bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4
by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry. |
1 |
# Copyright (C) 2006-2010 Canonical Ltd
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
16 |
|
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
17 |
"""A collection of function for handling URL operations."""
|
18 |
||
6379.6.3
by Jelmer Vernooij
Use absolute_import. |
19 |
from __future__ import absolute_import |
20 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
21 |
import os |
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
22 |
import re |
23 |
import sys |
|
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
24 |
|
6621.2.26
by Martin
Misc set of changes to get started with selftest on Python 3 |
25 |
try: |
26 |
import urlparse |
|
27 |
except ImportError: |
|
28 |
from urllib import parse as urlparse |
|
29 |
||
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
30 |
from . import ( |
31 |
errors, |
|
32 |
osutils, |
|
33 |
)
|
|
34 |
||
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
35 |
from .lazy_import import lazy_import |
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
36 |
lazy_import(globals(), """ |
6015.39.2
by Florian Vichot
Fixed an infinite loop when creating a repo at the root of the filesystem, |
37 |
from posixpath import split as _posix_split
|
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
38 |
""") |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
39 |
|
6624
by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes') |
40 |
from .sixish import ( |
7078.15.1
by Jelmer Vernooij
Fix some more tests. |
41 |
int2byte, |
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
42 |
PY3, |
6621.2.26
by Martin
Misc set of changes to get started with selftest on Python 3 |
43 |
text_type, |
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
44 |
unichr, |
6621.2.26
by Martin
Misc set of changes to get started with selftest on Python 3 |
45 |
)
|
46 |
||
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
47 |
|
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
48 |
class InvalidURL(errors.PathError): |
49 |
||
50 |
_fmt = 'Invalid url supplied to transport: "%(path)s"%(extra)s' |
|
51 |
||
52 |
||
53 |
class InvalidURLJoin(errors.PathError): |
|
54 |
||
55 |
_fmt = "Invalid URL join request: %(reason)s: %(base)r + %(join_args)r" |
|
56 |
||
57 |
def __init__(self, reason, base, join_args): |
|
58 |
self.reason = reason |
|
59 |
self.base = base |
|
60 |
self.join_args = join_args |
|
61 |
errors.PathError.__init__(self, base, reason) |
|
62 |
||
63 |
||
64 |
class InvalidRebaseURLs(errors.PathError): |
|
65 |
||
66 |
_fmt = "URLs differ by more than path: %(from_)r and %(to)r" |
|
67 |
||
68 |
def __init__(self, from_, to): |
|
69 |
self.from_ = from_ |
|
70 |
self.to = to |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
71 |
errors.PathError.__init__( |
72 |
self, from_, 'URLs differ by more than path.') |
|
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
73 |
|
74 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
75 |
def basename(url, exclude_trailing_slash=True): |
76 |
"""Return the last component of a URL. |
|
77 |
||
78 |
:param url: The URL in question
|
|
79 |
:param exclude_trailing_slash: If the url looks like "path/to/foo/"
|
|
80 |
ignore the final slash and return 'foo' rather than ''
|
|
81 |
:return: Just the final component of the URL. This can return ''
|
|
82 |
if you don't exclude_trailing_slash, or if you are at the
|
|
83 |
root of the URL.
|
|
84 |
"""
|
|
85 |
return split(url, exclude_trailing_slash=exclude_trailing_slash)[1] |
|
86 |
||
87 |
||
88 |
def dirname(url, exclude_trailing_slash=True): |
|
89 |
"""Return the parent directory of the given path. |
|
90 |
||
91 |
:param url: Relative or absolute URL
|
|
92 |
:param exclude_trailing_slash: Remove a final slash
|
|
93 |
(treat http://host/foo/ as http://host/foo, but
|
|
94 |
http://host/ stays http://host/)
|
|
95 |
:return: Everything in the URL except the last path chunk
|
|
96 |
"""
|
|
97 |
# TODO: jam 20060502 This was named dirname to be consistent
|
|
98 |
# with the os functions, but maybe "parent" would be better
|
|
99 |
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0] |
|
100 |
||
101 |
||
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
102 |
if PY3: |
103 |
quote_from_bytes = urlparse.quote_from_bytes |
|
104 |
quote = urlparse.quote |
|
7045.4.1
by Jelmer Vernooij
Some brz-git fixes. |
105 |
unquote_to_bytes = urlparse.unquote_to_bytes |
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
106 |
else: |
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
107 |
# Private copies of quote and unquote, copied from Python's urllib module
|
108 |
# because urllib unconditionally imports socket, which imports ssl.
|
|
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
109 |
|
110 |
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' |
|
111 |
'abcdefghijklmnopqrstuvwxyz'
|
|
112 |
'0123456789' '_.-') |
|
113 |
_safe_map = {} |
|
114 |
for i, c in zip(range(256), ''.join(map(chr, range(256)))): |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
115 |
_safe_map[c] = c if ( |
116 |
i < 128 and c in always_safe) else '%{0:02X}'.format(i) |
|
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
117 |
_safe_quoters = {} |
118 |
||
119 |
def quote_from_bytes(s, safe='/'): |
|
120 |
"""quote('abc def') -> 'abc%20def' |
|
121 |
||
122 |
Each part of a URL, e.g. the path info, the query, etc., has a
|
|
123 |
different set of reserved characters that must be quoted.
|
|
124 |
||
125 |
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
|
|
126 |
the following reserved characters.
|
|
127 |
||
128 |
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
|
|
129 |
"$" | ","
|
|
130 |
||
131 |
Each of these characters is reserved in some component of a URL,
|
|
132 |
but not necessarily in all of them.
|
|
133 |
||
134 |
By default, the quote function is intended for quoting the path
|
|
135 |
section of a URL. Thus, it will not encode '/'. This character
|
|
136 |
is reserved, but in typical usage the quote function is being
|
|
137 |
called on a path where the existing slash characters are used as
|
|
138 |
reserved characters.
|
|
139 |
"""
|
|
140 |
# fastpath
|
|
141 |
if not s: |
|
142 |
if s is None: |
|
143 |
raise TypeError('None object cannot be quoted') |
|
144 |
return s |
|
145 |
cachekey = (safe, always_safe) |
|
146 |
try: |
|
147 |
(quoter, safe) = _safe_quoters[cachekey] |
|
148 |
except KeyError: |
|
149 |
safe_map = _safe_map.copy() |
|
150 |
safe_map.update([(c, c) for c in safe]) |
|
151 |
quoter = safe_map.__getitem__ |
|
152 |
safe = always_safe + safe |
|
153 |
_safe_quoters[cachekey] = (quoter, safe) |
|
154 |
if not s.rstrip(safe): |
|
155 |
return s |
|
156 |
return ''.join(map(quoter, s)) |
|
157 |
||
158 |
quote = quote_from_bytes |
|
7045.4.1
by Jelmer Vernooij
Some brz-git fixes. |
159 |
unquote_to_bytes = urlparse.unquote |
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
160 |
|
161 |
||
162 |
unquote = urlparse.unquote |
|
6379.4.2
by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote. |
163 |
|
164 |
||
7141.8.1
by Jelmer Vernooij
Read parent branch properly from git config. |
165 |
def escape(relpath, safe='/~'): |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
166 |
"""Escape relpath to be a valid url.""" |
6973.6.1
by Jelmer Vernooij
More bees. |
167 |
if not isinstance(relpath, str) and sys.version_info[0] == 2: |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
168 |
relpath = relpath.encode('utf-8') |
7141.8.1
by Jelmer Vernooij
Read parent branch properly from git config. |
169 |
return quote(relpath, safe=safe) |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
170 |
|
171 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
172 |
def file_relpath(base, path): |
173 |
"""Compute just the relative sub-portion of a url |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
174 |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
175 |
This assumes that both paths are already fully specified file:// URLs.
|
176 |
"""
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
177 |
if len(base) < MIN_ABS_FILEURL_LENGTH: |
4539.1.1
by Andrew Bennetts
Improve error message in osutils.file_relpath. |
178 |
raise ValueError('Length of base (%r) must equal or' |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
179 |
' exceed the platform minimum url length (which is %d)' % |
180 |
(base, MIN_ABS_FILEURL_LENGTH)) |
|
6240.4.3
by Martin Packman
Use filesystem rather than url path function to strip terminal slash |
181 |
base = osutils.normpath(local_path_from_url(base)) |
182 |
path = osutils.normpath(local_path_from_url(path)) |
|
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
183 |
return escape(osutils.relpath(base, path)) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
184 |
|
185 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
186 |
def _find_scheme_and_separator(url): |
187 |
"""Find the scheme separator (://) and the first path separator |
|
188 |
||
189 |
This is just a helper functions for other path utilities.
|
|
190 |
It could probably be replaced by urlparse
|
|
191 |
"""
|
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
192 |
m = _url_scheme_re.match(url) |
193 |
if not m: |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
194 |
return None, None |
195 |
||
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
196 |
scheme = m.group('scheme') |
197 |
path = m.group('path') |
|
198 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
199 |
# Find the path separating slash
|
200 |
# (first slash after the ://)
|
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
201 |
first_path_slash = path.find('/') |
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
202 |
if first_path_slash == -1: |
1685.1.56
by John Arbash Meinel
Fixing _find_scheme_and_separator |
203 |
return len(scheme), None |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
204 |
return len(scheme), first_path_slash + m.start('path') |
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
205 |
|
206 |
||
5254.2.1
by Gordon Tyler
Fixed how get_transport's convert_path_to_url tests whether a path is actually a URL. |
207 |
def is_url(url): |
208 |
"""Tests whether a URL is in actual fact a URL.""" |
|
209 |
return _url_scheme_re.match(url) is not None |
|
210 |
||
211 |
||
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
212 |
def join(base, *args): |
213 |
"""Create a URL by joining sections. |
|
214 |
||
215 |
This will normalize '..', assuming that paths are absolute
|
|
216 |
(it assumes no symlinks in either path)
|
|
217 |
||
218 |
If any of *args is an absolute URL, it will be treated correctly.
|
|
219 |
Example:
|
|
220 |
join('http://foo', 'http://bar') => 'http://bar'
|
|
221 |
join('http://foo', 'bar') => 'http://foo/bar'
|
|
222 |
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
|
|
223 |
"""
|
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
224 |
if not args: |
225 |
return base |
|
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
226 |
scheme_end, path_start = _find_scheme_and_separator(base) |
227 |
if scheme_end is None and path_start is None: |
|
228 |
path_start = 0 |
|
229 |
elif path_start is None: |
|
230 |
path_start = len(base) |
|
231 |
path = base[path_start:] |
|
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
232 |
for arg in args: |
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
233 |
arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg) |
234 |
if arg_scheme_end is None and arg_path_start is None: |
|
235 |
arg_path_start = 0 |
|
236 |
elif arg_path_start is None: |
|
237 |
arg_path_start = len(arg) |
|
5254.1.5
by Gordon Tyler
Fixes according to spiv's review. |
238 |
if arg_scheme_end is not None: |
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
239 |
base = arg |
240 |
path = arg[arg_path_start:] |
|
241 |
scheme_end = arg_scheme_end |
|
242 |
path_start = arg_path_start |
|
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
243 |
else: |
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
244 |
path = joinpath(path, arg) |
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
245 |
return base[:path_start] + path |
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
246 |
|
247 |
||
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
248 |
def joinpath(base, *args): |
249 |
"""Join URL path segments to a URL path segment. |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
250 |
|
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
251 |
This is somewhat like osutils.joinpath, but intended for URLs.
|
252 |
||
253 |
XXX: this duplicates some normalisation logic, and also duplicates a lot of
|
|
254 |
path handling logic that already exists in some Transport implementations.
|
|
255 |
We really should try to have exactly one place in the code base responsible
|
|
256 |
for combining paths of URLs.
|
|
257 |
"""
|
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
258 |
path = base.split('/') |
259 |
if len(path) > 1 and path[-1] == '': |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
260 |
# If the path ends in a trailing /, remove it.
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
261 |
path.pop() |
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
262 |
for arg in args: |
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
263 |
if arg.startswith('/'): |
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
264 |
path = [] |
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
265 |
for chunk in arg.split('/'): |
266 |
if chunk == '.': |
|
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
267 |
continue
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
268 |
elif chunk == '..': |
269 |
if path == ['']: |
|
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
270 |
raise InvalidURLJoin('Cannot go above root', |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
271 |
base, args) |
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
272 |
path.pop() |
273 |
else: |
|
274 |
path.append(chunk) |
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
275 |
if path == ['']: |
276 |
return '/' |
|
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
277 |
else: |
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
278 |
return '/'.join(path) |
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
279 |
|
280 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
281 |
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
|
282 |
def _posix_local_path_from_url(url): |
|
283 |
"""Convert a url like file:///path/to/foo into /path/to/foo""" |
|
5268.7.21
by Jelmer Vernooij
Cope with segment parameters in urls. |
284 |
url = split_segment_parameters_raw(url)[0] |
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
285 |
file_localhost_prefix = 'file://localhost/' |
4828.1.1
by Michael Hudson
test and fix |
286 |
if url.startswith(file_localhost_prefix): |
287 |
path = url[len(file_localhost_prefix) - 1:] |
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
288 |
elif not url.startswith('file:///'): |
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
289 |
raise InvalidURL( |
4828.1.1
by Michael Hudson
test and fix |
290 |
url, 'local urls must start with file:/// or file://localhost/') |
291 |
else: |
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
292 |
path = url[len('file://'):] |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
293 |
# We only strip off 2 slashes
|
4828.1.1
by Michael Hudson
test and fix |
294 |
return unescape(path) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
295 |
|
296 |
||
297 |
def _posix_local_path_to_url(path): |
|
298 |
"""Convert a local path like ./foo into a URL like file:///path/to/foo |
|
299 |
||
300 |
This also handles transforming escaping unicode characters, etc.
|
|
301 |
"""
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
302 |
# importing directly from posixpath allows us to test this
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
303 |
# on non-posix platforms
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
304 |
return 'file://' + escape(osutils._posix_abspath(path)) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
305 |
|
306 |
||
307 |
def _win32_local_path_from_url(url): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
308 |
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo""" |
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
309 |
if not url.startswith('file://'): |
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
310 |
raise InvalidURL(url, 'local urls must start with file:///, ' |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
311 |
'UNC path urls must start with file://') |
5268.7.21
by Jelmer Vernooij
Cope with segment parameters in urls. |
312 |
url = split_segment_parameters_raw(url)[0] |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
313 |
# We strip off all 3 slashes
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
314 |
win32_url = url[len('file:'):] |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
315 |
# check for UNC path: //HOST/path
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
316 |
if not win32_url.startswith('///'): |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
317 |
if (win32_url[2] == '/' |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
318 |
or win32_url[3] in '|:'): |
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
319 |
raise InvalidURL(url, 'Win32 UNC path urls' |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
320 |
' have form file://HOST/path') |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
321 |
return unescape(win32_url) |
3503.1.2
by adwi2
Permits Windows to serve all paths on all drives. |
322 |
|
323 |
# allow empty paths so we can serve all roots
|
|
324 |
if win32_url == '///': |
|
325 |
return '/' |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
326 |
|
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
327 |
# usual local path with drive letter
|
5510.2.3
by Jared Bunting
Changed _win32_local_path_from_url to not allow "file:///C:" form. |
328 |
if (len(win32_url) < 6 |
5510.2.1
by Jared Bunting
Modified _win32_local_path_from_url to: |
329 |
or win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz' |
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
330 |
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') or |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
331 |
win32_url[4] not in '|:' |
332 |
or win32_url[5] != '/'): |
|
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
333 |
raise InvalidURL(url, 'Win32 file urls start with' |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
334 |
' file:///x:/, where x is a valid drive letter') |
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
335 |
return win32_url[3].upper() + u':' + unescape(win32_url[5:]) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
336 |
|
337 |
||
338 |
def _win32_local_path_to_url(path): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
339 |
"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
340 |
|
341 |
This also handles transforming escaping unicode characters, etc.
|
|
342 |
"""
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
343 |
# importing directly from ntpath allows us to test this
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
344 |
# on non-win32 platform
|
345 |
# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
|
|
346 |
# which actually strips trailing space characters.
|
|
5278.1.5
by Martin Pool
Correct more sloppy use of the term 'Linux' |
347 |
# The worst part is that on linux ntpath.abspath has different
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
348 |
# semantics, since 'nt' is not an available module.
|
3503.1.1
by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url |
349 |
if path == '/': |
3503.1.2
by adwi2
Permits Windows to serve all paths on all drives. |
350 |
return 'file:///' |
3503.1.1
by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url |
351 |
|
2279.4.2
by Alexander Belchenko
Don't do normpath after abspath, because this function is called inside abspath |
352 |
win32_path = osutils._win32_abspath(path) |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
353 |
# check for UNC path \\HOST\path
|
354 |
if win32_path.startswith('//'): |
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
355 |
return 'file:' + escape(win32_path) |
3234.3.1
by Alexander Belchenko
ensure that local_path_to_url() always returns plain string, not unicode. |
356 |
return ('file:///' + str(win32_path[0].upper()) + ':' + |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
357 |
escape(win32_path[2:])) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
358 |
|
359 |
||
360 |
local_path_to_url = _posix_local_path_to_url |
|
361 |
local_path_from_url = _posix_local_path_from_url |
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
362 |
MIN_ABS_FILEURL_LENGTH = len('file:///') |
1711.4.17
by John Arbash Meinel
[merge] bzr.dev 1790 |
363 |
WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/') |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
364 |
|
365 |
if sys.platform == 'win32': |
|
366 |
local_path_to_url = _win32_local_path_to_url |
|
367 |
local_path_from_url = _win32_local_path_from_url |
|
368 |
||
1711.2.44
by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it. |
369 |
MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
370 |
|
371 |
||
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
372 |
_url_scheme_re = re.compile('^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$') |
373 |
_url_hex_escapes_re = re.compile('(%[0-9a-fA-F]{2})') |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
374 |
|
375 |
||
376 |
def _unescape_safe_chars(matchobj): |
|
377 |
"""re.sub callback to convert hex-escapes to plain characters (if safe). |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
378 |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
379 |
e.g. '%7E' will be converted to '~'.
|
380 |
"""
|
|
381 |
hex_digits = matchobj.group(0)[1:] |
|
382 |
char = chr(int(hex_digits, 16)) |
|
383 |
if char in _url_dont_escape_characters: |
|
384 |
return char |
|
385 |
else: |
|
386 |
return matchobj.group(0).upper() |
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
387 |
|
388 |
||
389 |
def normalize_url(url): |
|
390 |
"""Make sure that a path string is in fully normalized URL form. |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
391 |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
392 |
This handles URLs which have unicode characters, spaces,
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
393 |
special characters, etc.
|
394 |
||
395 |
It has two basic modes of operation, depending on whether the
|
|
396 |
supplied string starts with a url specifier (scheme://) or not.
|
|
397 |
If it does not have a specifier it is considered a local path,
|
|
398 |
and will be converted into a file:/// url. Non-ascii characters
|
|
399 |
will be encoded using utf-8.
|
|
400 |
If it does have a url specifier, it will be treated as a "hybrid"
|
|
401 |
URL. Basically, a URL that should have URL special characters already
|
|
402 |
escaped (like +?&# etc), but may have unicode characters, etc
|
|
403 |
which would not be valid in a real URL.
|
|
404 |
||
405 |
:param url: Either a hybrid URL or a local path
|
|
406 |
:return: A normalized URL which only includes 7-bit ASCII characters.
|
|
407 |
"""
|
|
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
408 |
scheme_end, path_start = _find_scheme_and_separator(url) |
409 |
if scheme_end is None: |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
410 |
return local_path_to_url(url) |
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
411 |
prefix = url[:path_start] |
412 |
path = url[path_start:] |
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
413 |
if not isinstance(url, text_type): |
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
414 |
for c in url: |
415 |
if c not in _url_safe_characters: |
|
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
416 |
raise InvalidURL(url, 'URLs can only contain specific' |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
417 |
' safe characters (not %r)' % c) |
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
418 |
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path) |
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
419 |
return str(prefix + ''.join(path)) |
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
420 |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
421 |
# We have a unicode (hybrid) url
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
422 |
path_chars = list(path) |
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
423 |
|
6651.2.2
by Martin
Apply 2to3 xrange fix and fix up with sixish range |
424 |
for i in range(len(path_chars)): |
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
425 |
if path_chars[i] not in _url_safe_characters: |
426 |
path_chars[i] = ''.join( |
|
7058.4.1
by Jelmer Vernooij
Fix another 40 tests. |
427 |
['%%%02X' % c for c in bytearray(path_chars[i].encode('utf-8'))]) |
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
428 |
path = ''.join(path_chars) |
429 |
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path) |
|
5254.1.1
by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar. |
430 |
return str(prefix + path) |
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
431 |
|
432 |
||
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
433 |
def relative_url(base, other): |
434 |
"""Return a path to other from base. |
|
435 |
||
436 |
If other is unrelated to base, return other. Else return a relative path.
|
|
437 |
This assumes no symlinks as part of the url.
|
|
438 |
"""
|
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
439 |
dummy, base_first_slash = _find_scheme_and_separator(base) |
440 |
if base_first_slash is None: |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
441 |
return other |
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
442 |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
443 |
dummy, other_first_slash = _find_scheme_and_separator(other) |
444 |
if other_first_slash is None: |
|
445 |
return other |
|
446 |
||
447 |
# this takes care of differing schemes or hosts
|
|
448 |
base_scheme = base[:base_first_slash] |
|
449 |
other_scheme = other[:other_first_slash] |
|
450 |
if base_scheme != other_scheme: |
|
451 |
return other |
|
3139.2.1
by Alexander Belchenko
bugfix #90847: fix problem with parent location on another logical drive |
452 |
elif sys.platform == 'win32' and base_scheme == 'file://': |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
453 |
base_drive = base[base_first_slash + 1:base_first_slash + 3] |
454 |
other_drive = other[other_first_slash + 1:other_first_slash + 3] |
|
3139.2.1
by Alexander Belchenko
bugfix #90847: fix problem with parent location on another logical drive |
455 |
if base_drive != other_drive: |
456 |
return other |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
457 |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
458 |
base_path = base[base_first_slash + 1:] |
459 |
other_path = other[other_first_slash + 1:] |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
460 |
|
461 |
if base_path.endswith('/'): |
|
462 |
base_path = base_path[:-1] |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
463 |
|
464 |
base_sections = base_path.split('/') |
|
465 |
other_sections = other_path.split('/') |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
466 |
|
467 |
if base_sections == ['']: |
|
468 |
base_sections = [] |
|
469 |
if other_sections == ['']: |
|
470 |
other_sections = [] |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
471 |
|
472 |
output_sections = [] |
|
473 |
for b, o in zip(base_sections, other_sections): |
|
474 |
if b != o: |
|
475 |
break
|
|
476 |
output_sections.append(b) |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
477 |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
478 |
match_len = len(output_sections) |
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
479 |
output_sections = ['..' for x in base_sections[match_len:]] |
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
480 |
output_sections.extend(other_sections[match_len:]) |
481 |
||
482 |
return "/".join(output_sections) or "." |
|
483 |
||
484 |
||
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
485 |
def _win32_extract_drive_letter(url_base, path): |
486 |
"""On win32 the drive letter needs to be added to the url base.""" |
|
487 |
# Strip off the drive letter
|
|
488 |
# path is currently /C:/foo
|
|
6123.3.2
by Martin
Treat file:///C: as invalid on windows instead of throwing an IndexError |
489 |
if len(path) < 4 or path[2] not in ':|' or path[3] != '/': |
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
490 |
raise InvalidURL(url_base + path, |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
491 |
'win32 file:/// paths need a drive letter') |
492 |
url_base += path[0:3] # file:// + /C: |
|
493 |
path = path[3:] # /foo |
|
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
494 |
return url_base, path |
495 |
||
496 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
497 |
def split(url, exclude_trailing_slash=True): |
498 |
"""Split a URL into its parent directory and a child directory. |
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
499 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
500 |
:param url: A relative or absolute URL
|
501 |
:param exclude_trailing_slash: Strip off a final '/' if it is part
|
|
502 |
of the path (but not if it is part of the protocol specification)
|
|
1685.1.61
by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split |
503 |
|
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
504 |
:return: (parent_url, child_dir). child_dir may be the empty string if
|
505 |
we're at the root.
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
506 |
"""
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
507 |
scheme_loc, first_path_slash = _find_scheme_and_separator(url) |
508 |
||
509 |
if first_path_slash is None: |
|
510 |
# We have either a relative path, or no separating slash
|
|
511 |
if scheme_loc is None: |
|
512 |
# Relative path
|
|
513 |
if exclude_trailing_slash and url.endswith('/'): |
|
514 |
url = url[:-1] |
|
515 |
return _posix_split(url) |
|
516 |
else: |
|
517 |
# Scheme with no path
|
|
518 |
return url, '' |
|
519 |
||
520 |
# We have a fully defined path
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
521 |
url_base = url[:first_path_slash] # http://host, file:// |
522 |
path = url[first_path_slash:] # /file/foo |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
523 |
|
524 |
if sys.platform == 'win32' and url.startswith('file:///'): |
|
525 |
# Strip off the drive letter
|
|
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
526 |
# url_base is currently file://
|
1711.2.39
by John Arbash Meinel
Fix bzrlib.urlutils.split() to work properly on win32 local paths. |
527 |
# path is currently /C:/foo
|
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
528 |
url_base, path = _win32_extract_drive_letter(url_base, path) |
529 |
# now it should be file:///C: and /foo
|
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
530 |
|
531 |
if exclude_trailing_slash and len(path) > 1 and path.endswith('/'): |
|
532 |
path = path[:-1] |
|
533 |
head, tail = _posix_split(path) |
|
534 |
return url_base + head, tail |
|
535 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
536 |
|
5163.2.5
by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests. |
537 |
def split_segment_parameters_raw(url): |
5163.2.1
by Jelmer Vernooij
Add urlutils.split_subsegments. |
538 |
"""Split the subsegment of the last segment of a URL. |
539 |
||
540 |
:param url: A relative or absolute URL
|
|
541 |
:return: (url, subsegments)
|
|
542 |
"""
|
|
6278.1.4
by Martin Packman
Fix fallout on per_transport tests from suspect terminal slash handling |
543 |
# GZ 2011-11-18: Dodgy removing the terminal slash like this, function
|
544 |
# operates on urls not url+segments, and Transport classes
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
545 |
# should not be blindly adding slashes in the first place.
|
6278.1.4
by Martin Packman
Fix fallout on per_transport tests from suspect terminal slash handling |
546 |
lurl = strip_trailing_slash(url) |
6278.1.5
by Martin Packman
Spelling tweaks suggested by vila in review |
547 |
# Segments begin at first comma after last forward slash, if one exists
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
548 |
segment_start = lurl.find(",", lurl.rfind("/") + 1) |
6278.1.3
by Martin Packman
Stop using urlutils.split before segment parameters have been removed |
549 |
if segment_start == -1: |
5163.2.1
by Jelmer Vernooij
Add urlutils.split_subsegments. |
550 |
return (url, []) |
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
551 |
return (lurl[:segment_start], |
552 |
[str(s) for s in lurl[segment_start + 1:].split(",")]) |
|
5163.2.1
by Jelmer Vernooij
Add urlutils.split_subsegments. |
553 |
|
554 |
||
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
555 |
def split_segment_parameters(url): |
556 |
"""Split the segment parameters of the last segment of a URL. |
|
557 |
||
558 |
:param url: A relative or absolute URL
|
|
559 |
:return: (url, segment_parameters)
|
|
560 |
"""
|
|
5163.2.5
by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests. |
561 |
(base_url, subsegments) = split_segment_parameters_raw(url) |
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
562 |
parameters = {} |
563 |
for subsegment in subsegments: |
|
7290.10.1
by Jelmer Vernooij
Raise better error when path subsegments lack =. |
564 |
try: |
565 |
(key, value) = subsegment.split("=", 1) |
|
566 |
except ValueError: |
|
567 |
raise InvalidURL(url, "missing = in subsegment") |
|
6963.1.1
by Jelmer Vernooij
Fix a bunch of tests on python3. |
568 |
if not isinstance(key, str): |
569 |
raise TypeError(key) |
|
570 |
if not isinstance(value, str): |
|
571 |
raise TypeError(value) |
|
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
572 |
parameters[key] = value |
573 |
return (base_url, parameters) |
|
574 |
||
575 |
||
5163.2.5
by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests. |
576 |
def join_segment_parameters_raw(base, *subsegments): |
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
577 |
"""Create a new URL by adding subsegments to an existing one. |
5163.2.7
by Jelmer Vernooij
Add type checking. |
578 |
|
579 |
This adds the specified subsegments to the last path in the specified
|
|
580 |
base URL. The subsegments should be bytestrings.
|
|
5163.2.2
by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments. |
581 |
|
5163.2.5
by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests. |
582 |
:note: You probably want to use join_segment_parameters instead.
|
5163.2.2
by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments. |
583 |
"""
|
584 |
if not subsegments: |
|
585 |
return base |
|
586 |
for subsegment in subsegments: |
|
6619.3.18
by Jelmer Vernooij
Run 2to3 idioms fixer. |
587 |
if not isinstance(subsegment, str): |
5163.2.7
by Jelmer Vernooij
Add type checking. |
588 |
raise TypeError("Subsegment %r is not a bytestring" % subsegment) |
5163.2.2
by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments. |
589 |
if "," in subsegment: |
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
590 |
raise InvalidURLJoin(", exists in subsegments", |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
591 |
base, subsegments) |
5163.2.2
by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments. |
592 |
return ",".join((base,) + subsegments) |
593 |
||
594 |
||
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
595 |
def join_segment_parameters(url, parameters): |
596 |
"""Create a new URL by adding segment parameters to an existing one. |
|
597 |
||
5163.2.7
by Jelmer Vernooij
Add type checking. |
598 |
The parameters of the last segment in the URL will be updated; if a
|
599 |
parameter with the same key already exists it will be overwritten.
|
|
600 |
||
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
601 |
:param url: A URL, as string
|
5163.2.7
by Jelmer Vernooij
Add type checking. |
602 |
:param parameters: Dictionary of parameters, keys and values as bytestrings
|
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
603 |
"""
|
604 |
(base, existing_parameters) = split_segment_parameters(url) |
|
605 |
new_parameters = {} |
|
606 |
new_parameters.update(existing_parameters) |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
607 |
for key, value in parameters.items(): |
6619.3.18
by Jelmer Vernooij
Run 2to3 idioms fixer. |
608 |
if not isinstance(key, str): |
6973.6.1
by Jelmer Vernooij
More bees. |
609 |
raise TypeError("parameter key %r is not a str" % key) |
6619.3.18
by Jelmer Vernooij
Run 2to3 idioms fixer. |
610 |
if not isinstance(value, str): |
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
611 |
raise TypeError("parameter value %r for %r is not a str" % |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
612 |
(value, key)) |
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
613 |
if "=" in key: |
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
614 |
raise InvalidURLJoin("= exists in parameter key", url, |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
615 |
parameters) |
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
616 |
new_parameters[key] = value |
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
617 |
return join_segment_parameters_raw( |
618 |
base, *["%s=%s" % item for item in sorted(new_parameters.items())]) |
|
5163.2.3
by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters. |
619 |
|
620 |
||
1711.2.44
by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it. |
621 |
def _win32_strip_local_trailing_slash(url): |
622 |
"""Strip slashes after the drive letter""" |
|
623 |
if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH: |
|
624 |
return url[:-1] |
|
625 |
else: |
|
626 |
return url |
|
627 |
||
628 |
||
1685.1.47
by John Arbash Meinel
s comes before u |
629 |
def strip_trailing_slash(url): |
630 |
"""Strip trailing slash, except for root paths. |
|
631 |
||
632 |
The definition of 'root path' is platform-dependent.
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
633 |
This assumes that all URLs are valid netloc urls, such that they
|
634 |
form:
|
|
635 |
scheme://host/path
|
|
636 |
It searches for ://, and then refuses to remove the next '/'.
|
|
637 |
It can also handle relative paths
|
|
638 |
Examples:
|
|
639 |
path/to/foo => path/to/foo
|
|
640 |
path/to/foo/ => path/to/foo
|
|
641 |
http://host/path/ => http://host/path
|
|
642 |
http://host/path => http://host/path
|
|
643 |
http://host/ => http://host/
|
|
644 |
file:/// => file:///
|
|
645 |
file:///foo/ => file:///foo
|
|
646 |
# This is unique on win32 platforms, and is the only URL
|
|
647 |
# format which does it differently.
|
|
1711.4.8
by John Arbash Meinel
switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters |
648 |
file:///c|/ => file:///c:/
|
1685.1.47
by John Arbash Meinel
s comes before u |
649 |
"""
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
650 |
if not url.endswith('/'): |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
651 |
# Nothing to do
|
652 |
return url |
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
653 |
if sys.platform == 'win32' and url.startswith('file://'): |
1711.2.44
by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it. |
654 |
return _win32_strip_local_trailing_slash(url) |
1685.1.80
by Wouter van Heyst
more code cleanup |
655 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
656 |
scheme_loc, first_path_slash = _find_scheme_and_separator(url) |
657 |
if scheme_loc is None: |
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
658 |
# This is a relative path, as it has no scheme
|
659 |
# so just chop off the last character
|
|
1685.1.47
by John Arbash Meinel
s comes before u |
660 |
return url[:-1] |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
661 |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
662 |
if first_path_slash is None or first_path_slash == len(url) - 1: |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
663 |
# Don't chop off anything if the only slash is the path
|
664 |
# separating slash
|
|
1685.1.47
by John Arbash Meinel
s comes before u |
665 |
return url |
666 |
||
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
667 |
return url[:-1] |
668 |
||
1685.1.47
by John Arbash Meinel
s comes before u |
669 |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
670 |
def unescape(url): |
671 |
"""Unescape relpath from url format. |
|
672 |
||
673 |
This returns a Unicode path from a URL
|
|
674 |
"""
|
|
7067.5.1
by Jelmer Vernooij
Check for unicode in URLs in unescape |
675 |
# jam 20060427 URLs are supposed to be ASCII only strings
|
676 |
# If they are passed in as unicode, unquote
|
|
677 |
# will return a UNICODE string, which actually contains
|
|
678 |
# utf-8 bytes. So we have to ensure that they are
|
|
679 |
# plain ASCII strings, or the final .decode will
|
|
680 |
# try to encode the UNICODE => ASCII, and then decode
|
|
681 |
# it into utf-8.
|
|
682 |
||
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
683 |
if PY3: |
7067.5.1
by Jelmer Vernooij
Check for unicode in URLs in unescape |
684 |
if isinstance(url, text_type): |
685 |
try: |
|
686 |
url.encode("ascii") |
|
687 |
except UnicodeError as e: |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
688 |
raise InvalidURL( |
689 |
url, 'URL was not a plain ASCII url: %s' % (e,)) |
|
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
690 |
return urlparse.unquote(url) |
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
691 |
else: |
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
692 |
if isinstance(url, text_type): |
693 |
try: |
|
694 |
url = url.encode("ascii") |
|
695 |
except UnicodeError as e: |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
696 |
raise InvalidURL( |
697 |
url, 'URL was not a plain ASCII url: %s' % (e,)) |
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
698 |
unquoted = unquote(url) |
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
699 |
try: |
700 |
unicode_path = unquoted.decode('utf-8') |
|
701 |
except UnicodeError as e: |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
702 |
raise InvalidURL( |
703 |
url, 'Unable to encode the URL as utf-8: %s' % (e,)) |
|
7045.3.1
by Jelmer Vernooij
Fix another ~500 tests. |
704 |
return unicode_path |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
705 |
|
706 |
||
707 |
# These are characters that if escaped, should stay that way
|
|
708 |
_no_decode_chars = ';/?:@&=+$,#' |
|
709 |
_no_decode_ords = [ord(c) for c in _no_decode_chars] |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
710 |
_no_decode_hex = (['%02x' % o for o in _no_decode_ords] |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
711 |
+ ['%02X' % o for o in _no_decode_ords]) |
7078.15.1
by Jelmer Vernooij
Fix some more tests. |
712 |
_hex_display_map = dict(([('%02x' % o, int2byte(o)) for o in range(256)] |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
713 |
+ [('%02X' % o, int2byte(o)) for o in range(256)])) |
714 |
# These entries get mapped to themselves
|
|
715 |
_hex_display_map.update((hex, b'%' + hex.encode('ascii')) |
|
716 |
for hex in _no_decode_hex) |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
717 |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
718 |
# These characters shouldn't be percent-encoded, and it's always safe to
|
719 |
# unencode them if they are.
|
|
720 |
_url_dont_escape_characters = set( |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
721 |
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha |
722 |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha |
|
723 |
"0123456789" # Numbers |
|
724 |
"-._~" # Unreserved characters |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
725 |
)
|
726 |
||
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
727 |
# These characters should not be escaped
|
2167.2.2
by Aaron Bentley
Update safe character list |
728 |
_url_safe_characters = set( |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
729 |
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha |
730 |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha |
|
731 |
"0123456789" # Numbers |
|
732 |
"_.-!~*'()" # Unreserved characters |
|
733 |
"/;?:@&=+$," # Reserved characters |
|
734 |
"%#" # Extra reserved characters |
|
2167.2.2
by Aaron Bentley
Update safe character list |
735 |
)
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
736 |
|
7078.15.1
by Jelmer Vernooij
Fix some more tests. |
737 |
|
738 |
def _unescape_segment_for_display(segment, encoding): |
|
739 |
"""Unescape a segment for display. |
|
740 |
||
741 |
Helper for unescape_for_display
|
|
742 |
||
743 |
:param url: A 7-bit ASCII URL
|
|
744 |
:param encoding: The final output encoding
|
|
745 |
||
746 |
:return: A unicode string which can be safely encoded into the
|
|
747 |
specified encoding.
|
|
748 |
"""
|
|
749 |
escaped_chunks = segment.split('%') |
|
750 |
escaped_chunks[0] = escaped_chunks[0].encode('utf-8') |
|
751 |
for j in range(1, len(escaped_chunks)): |
|
752 |
item = escaped_chunks[j] |
|
753 |
try: |
|
754 |
escaped_chunks[j] = _hex_display_map[item[:2]] |
|
755 |
except KeyError: |
|
756 |
# Put back the percent symbol
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
757 |
escaped_chunks[j] = b'%' + \ |
758 |
(item[:2].encode('utf-8') if PY3 else item[:2]) |
|
7078.15.1
by Jelmer Vernooij
Fix some more tests. |
759 |
except UnicodeDecodeError: |
760 |
escaped_chunks[j] = unichr(int(item[:2], 16)).encode('utf-8') |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
761 |
escaped_chunks[j] += (item[2:].encode('utf-8') if PY3 else item[2:]) |
7078.15.1
by Jelmer Vernooij
Fix some more tests. |
762 |
unescaped = b''.join(escaped_chunks) |
763 |
try: |
|
764 |
decoded = unescaped.decode('utf-8') |
|
765 |
except UnicodeDecodeError: |
|
766 |
# If this path segment cannot be properly utf-8 decoded
|
|
767 |
# after doing unescaping we will just leave it alone
|
|
768 |
return segment |
|
769 |
else: |
|
770 |
try: |
|
771 |
decoded.encode(encoding) |
|
772 |
except UnicodeEncodeError: |
|
773 |
# If this chunk cannot be encoded in the local
|
|
774 |
# encoding, then we should leave it alone
|
|
775 |
return segment |
|
776 |
else: |
|
777 |
# Otherwise take the url decoded one
|
|
778 |
return decoded |
|
779 |
||
780 |
||
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
781 |
def unescape_for_display(url, encoding): |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
782 |
"""Decode what you can for a URL, so that we get a nice looking path. |
783 |
||
784 |
This will turn file:// urls into local paths, and try to decode
|
|
785 |
any portions of a http:// style url that it can.
|
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
786 |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
787 |
Any sections of the URL which can't be represented in the encoding or
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
788 |
need to stay as escapes are left alone.
|
789 |
||
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
790 |
:param url: A 7-bit ASCII URL
|
791 |
:param encoding: The final output encoding
|
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
792 |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
793 |
:return: A unicode string which can be safely encoded into the
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
794 |
specified encoding.
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
795 |
"""
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
796 |
if encoding is None: |
797 |
raise ValueError('you cannot specify None for the display encoding') |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
798 |
if url.startswith('file://'): |
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
799 |
try: |
800 |
path = local_path_from_url(url) |
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
801 |
path.encode(encoding) |
802 |
return path |
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
803 |
except UnicodeError: |
804 |
return url |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
805 |
|
806 |
# Split into sections to try to decode utf-8
|
|
807 |
res = url.split('/') |
|
6651.2.2
by Martin
Apply 2to3 xrange fix and fix up with sixish range |
808 |
for i in range(1, len(res)): |
7078.15.1
by Jelmer Vernooij
Fix some more tests. |
809 |
res[i] = _unescape_segment_for_display(res[i], encoding) |
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
810 |
return u'/'.join(res) |
2512.4.1
by Ian Clatworthy
Fixes #115491 - 'branch lp:projname' now creates ./projname as exected |
811 |
|
812 |
||
813 |
def derive_to_location(from_location): |
|
814 |
"""Derive a TO_LOCATION given a FROM_LOCATION. |
|
815 |
||
816 |
The normal case is a FROM_LOCATION of http://foo/bar => bar.
|
|
817 |
The Right Thing for some logical destinations may differ though
|
|
818 |
because no / may be present at all. In that case, the result is
|
|
819 |
the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
|
|
820 |
This latter case also applies when a Windows drive
|
|
821 |
is used without a path, e.g. c:foo-bar => foo-bar.
|
|
822 |
If no /, path separator or : is found, the from_location is returned.
|
|
823 |
"""
|
|
7111.1.1
by Jelmer Vernooij
Don't let the default directory name derive from a branch name. |
824 |
from_location, unused_params = split_segment_parameters(from_location) |
2512.4.1
by Ian Clatworthy
Fixes #115491 - 'branch lp:projname' now creates ./projname as exected |
825 |
if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0: |
826 |
return os.path.basename(from_location.rstrip("/\\")) |
|
827 |
else: |
|
828 |
sep = from_location.find(":") |
|
829 |
if sep > 0: |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
830 |
return from_location[sep + 1:] |
2512.4.1
by Ian Clatworthy
Fixes #115491 - 'branch lp:projname' now creates ./projname as exected |
831 |
else: |
832 |
return from_location |
|
3242.3.26
by Aaron Bentley
Implement rebase_url |
833 |
|
3242.3.35
by Aaron Bentley
Cleanups and documentation |
834 |
|
3242.3.26
by Aaron Bentley
Implement rebase_url |
835 |
def _is_absolute(url): |
836 |
return (osutils.pathjoin('/foo', url) == url) |
|
837 |
||
3242.3.35
by Aaron Bentley
Cleanups and documentation |
838 |
|
3242.3.26
by Aaron Bentley
Implement rebase_url |
839 |
def rebase_url(url, old_base, new_base): |
840 |
"""Convert a relative path from an old base URL to a new base URL. |
|
841 |
||
842 |
The result will be a relative path.
|
|
843 |
Absolute paths and full URLs are returned unaltered.
|
|
844 |
"""
|
|
845 |
scheme, separator = _find_scheme_and_separator(url) |
|
846 |
if scheme is not None: |
|
847 |
return url |
|
848 |
if _is_absolute(url): |
|
849 |
return url |
|
850 |
old_parsed = urlparse.urlparse(old_base) |
|
851 |
new_parsed = urlparse.urlparse(new_base) |
|
852 |
if (old_parsed[:2]) != (new_parsed[:2]): |
|
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
853 |
raise InvalidRebaseURLs(old_base, new_base) |
3242.3.36
by Aaron Bentley
Updates from review comments |
854 |
return determine_relative_path(new_parsed[2], |
3567.2.1
by Michael Hudson
urlutils.rebase_url handles '..' path segments in 'url' |
855 |
join(old_parsed[2], url)) |
3242.3.26
by Aaron Bentley
Implement rebase_url |
856 |
|
857 |
||
858 |
def determine_relative_path(from_path, to_path): |
|
859 |
"""Determine a relative path from from_path to to_path.""" |
|
860 |
from_segments = osutils.splitpath(from_path) |
|
861 |
to_segments = osutils.splitpath(to_path) |
|
862 |
count = -1 |
|
863 |
for count, (from_element, to_element) in enumerate(zip(from_segments, |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
864 |
to_segments)): |
3242.3.26
by Aaron Bentley
Implement rebase_url |
865 |
if from_element != to_element: |
866 |
break
|
|
867 |
else: |
|
868 |
count += 1 |
|
869 |
unique_from = from_segments[count:] |
|
870 |
unique_to = to_segments[count:] |
|
871 |
segments = (['..'] * len(unique_from) + unique_to) |
|
872 |
if len(segments) == 0: |
|
873 |
return '.' |
|
874 |
return osutils.pathjoin(*segments) |
|
3873.3.1
by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test |
875 |
|
876 |
||
6055.2.7
by Jelmer Vernooij
Change parse_url to URL.from_string. |
877 |
class URL(object): |
6055.2.1
by Jelmer Vernooij
Add UnparsedUrl. |
878 |
"""Parsed URL.""" |
879 |
||
6055.2.7
by Jelmer Vernooij
Change parse_url to URL.from_string. |
880 |
def __init__(self, scheme, quoted_user, quoted_password, quoted_host, |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
881 |
port, quoted_path): |
6055.2.1
by Jelmer Vernooij
Add UnparsedUrl. |
882 |
self.scheme = scheme |
883 |
self.quoted_host = quoted_host |
|
6379.4.2
by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote. |
884 |
self.host = unquote(self.quoted_host) |
6055.2.1
by Jelmer Vernooij
Add UnparsedUrl. |
885 |
self.quoted_user = quoted_user |
886 |
if self.quoted_user is not None: |
|
6379.4.2
by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote. |
887 |
self.user = unquote(self.quoted_user) |
6055.2.1
by Jelmer Vernooij
Add UnparsedUrl. |
888 |
else: |
889 |
self.user = None |
|
890 |
self.quoted_password = quoted_password |
|
891 |
if self.quoted_password is not None: |
|
6379.4.2
by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote. |
892 |
self.password = unquote(self.quoted_password) |
6055.2.1
by Jelmer Vernooij
Add UnparsedUrl. |
893 |
else: |
894 |
self.password = None |
|
895 |
self.port = port |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
896 |
self.quoted_path = _url_hex_escapes_re.sub( |
897 |
_unescape_safe_chars, quoted_path) |
|
6379.4.2
by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote. |
898 |
self.path = unquote(self.quoted_path) |
6055.2.1
by Jelmer Vernooij
Add UnparsedUrl. |
899 |
|
6055.2.8
by Jelmer Vernooij
Add repr() |
900 |
def __eq__(self, other): |
901 |
return (isinstance(other, self.__class__) and |
|
902 |
self.scheme == other.scheme and |
|
903 |
self.host == other.host and |
|
904 |
self.user == other.user and |
|
905 |
self.password == other.password and |
|
906 |
self.path == other.path) |
|
907 |
||
908 |
def __repr__(self): |
|
6055.2.11
by Jelmer Vernooij
Fix tests. |
909 |
return "<%s(%r, %r, %r, %r, %r, %r)>" % ( |
6055.2.8
by Jelmer Vernooij
Add repr() |
910 |
self.__class__.__name__, |
6055.2.11
by Jelmer Vernooij
Fix tests. |
911 |
self.scheme, self.quoted_user, self.quoted_password, |
912 |
self.quoted_host, self.port, self.quoted_path) |
|
6055.2.8
by Jelmer Vernooij
Add repr() |
913 |
|
6055.2.6
by Jelmer Vernooij
Split out parse_url. |
914 |
@classmethod
|
6055.2.7
by Jelmer Vernooij
Change parse_url to URL.from_string. |
915 |
def from_string(cls, url): |
916 |
"""Create a URL object from a string. |
|
6055.2.6
by Jelmer Vernooij
Split out parse_url. |
917 |
|
918 |
:param url: URL as bytestring
|
|
919 |
"""
|
|
6677.1.1
by Martin
Go back to native str for urls and many other py3 changes |
920 |
# GZ 2017-06-09: Actually validate ascii-ness
|
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
921 |
# pad.lv/1696545: For the moment, accept both native strings and
|
922 |
# unicode.
|
|
6973.6.1
by Jelmer Vernooij
More bees. |
923 |
if isinstance(url, str): |
924 |
pass
|
|
6973.6.2
by Jelmer Vernooij
Fix more tests. |
925 |
elif isinstance(url, text_type): |
6973.6.1
by Jelmer Vernooij
More bees. |
926 |
try: |
927 |
url = url.encode() |
|
928 |
except UnicodeEncodeError: |
|
929 |
raise InvalidURL(url) |
|
930 |
else: |
|
931 |
raise InvalidURL(url) |
|
6055.2.6
by Jelmer Vernooij
Split out parse_url. |
932 |
(scheme, netloc, path, params, |
933 |
query, fragment) = urlparse.urlparse(url, allow_fragments=False) |
|
934 |
user = password = host = port = None |
|
935 |
if '@' in netloc: |
|
936 |
user, host = netloc.rsplit('@', 1) |
|
937 |
if ':' in user: |
|
938 |
user, password = user.split(':', 1) |
|
939 |
else: |
|
940 |
host = netloc |
|
941 |
||
6055.2.14
by Jelmer Vernooij
Fix long line. |
942 |
if ':' in host and not (host[0] == '[' and host[-1] == ']'): |
943 |
# there *is* port
|
|
6809.1.1
by Martin
Apply 2to3 ws_comma fixer |
944 |
host, port = host.rsplit(':', 1) |
7096.2.1
by Jelmer Vernooij
Allow port to be empty when parsing URL. |
945 |
if port: |
946 |
try: |
|
947 |
port = int(port) |
|
948 |
except ValueError: |
|
949 |
raise InvalidURL('invalid port number %s in url:\n%s' % |
|
950 |
(port, url)) |
|
951 |
else: |
|
952 |
port = None |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
953 |
if host != "" and host[0] == '[' and host[-1] == ']': # IPv6 |
6055.2.6
by Jelmer Vernooij
Split out parse_url. |
954 |
host = host[1:-1] |
955 |
||
956 |
return cls(scheme, user, password, host, port, path) |
|
957 |
||
6055.2.13
by Jelmer Vernooij
Add URL.__str__. |
958 |
def __str__(self): |
959 |
netloc = self.quoted_host |
|
960 |
if ":" in netloc: |
|
961 |
netloc = "[%s]" % netloc |
|
962 |
if self.quoted_user is not None: |
|
963 |
# Note that we don't put the password back even if we
|
|
964 |
# have one so that it doesn't get accidentally
|
|
965 |
# exposed.
|
|
966 |
netloc = '%s@%s' % (self.quoted_user, netloc) |
|
967 |
if self.port is not None: |
|
968 |
netloc = '%s:%d' % (netloc, self.port) |
|
969 |
return urlparse.urlunparse( |
|
970 |
(self.scheme, netloc, self.quoted_path, None, None, None)) |
|
971 |
||
6055.2.15
by Jelmer Vernooij
Add URL._combine_paths. |
972 |
@staticmethod
|
973 |
def _combine_paths(base_path, relpath): |
|
974 |
"""Transform a Transport-relative path to a remote absolute path. |
|
975 |
||
976 |
This does not handle substitution of ~ but does handle '..' and '.'
|
|
977 |
components.
|
|
978 |
||
979 |
Examples::
|
|
980 |
||
981 |
t._combine_paths('/home/sarah', 'project/foo')
|
|
982 |
=> '/home/sarah/project/foo'
|
|
983 |
t._combine_paths('/home/sarah', '../../etc')
|
|
984 |
=> '/etc'
|
|
985 |
t._combine_paths('/home/sarah', '/etc')
|
|
986 |
=> '/etc'
|
|
987 |
||
988 |
:param base_path: base path
|
|
989 |
:param relpath: relative url string for relative part of remote path.
|
|
990 |
:return: urlencoded string for final path.
|
|
991 |
"""
|
|
7143.15.5
by Jelmer Vernooij
More PEP8 fixes. |
992 |
# pad.lv/1696545: For the moment, accept both native strings and
|
993 |
# unicode.
|
|
6963.2.15
by Jelmer Vernooij
Accept unicode - for now. |
994 |
if isinstance(relpath, str): |
995 |
pass
|
|
6986.1.1
by Jelmer Vernooij
Address review comments. |
996 |
elif isinstance(relpath, text_type): |
6963.2.16
by Jelmer Vernooij
Fix unicode tests. |
997 |
try: |
998 |
relpath = relpath.encode() |
|
999 |
except UnicodeEncodeError: |
|
1000 |
raise InvalidURL(relpath) |
|
6963.2.15
by Jelmer Vernooij
Accept unicode - for now. |
1001 |
else: |
6729.6.1
by Jelmer Vernooij
Move urlutils errors. |
1002 |
raise InvalidURL(relpath) |
6061.1.5
by Martin Packman
Unescape unreserved characters in relative portion when combining paths |
1003 |
relpath = _url_hex_escapes_re.sub(_unescape_safe_chars, relpath) |
6055.2.15
by Jelmer Vernooij
Add URL._combine_paths. |
1004 |
if relpath.startswith('/'): |
1005 |
base_parts = [] |
|
1006 |
else: |
|
1007 |
base_parts = base_path.split('/') |
|
1008 |
if len(base_parts) > 0 and base_parts[-1] == '': |
|
1009 |
base_parts = base_parts[:-1] |
|
1010 |
for p in relpath.split('/'): |
|
1011 |
if p == '..': |
|
1012 |
if len(base_parts) == 0: |
|
1013 |
# In most filesystems, a request for the parent
|
|
1014 |
# of root, just returns root.
|
|
1015 |
continue
|
|
1016 |
base_parts.pop() |
|
1017 |
elif p == '.': |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
1018 |
continue # No-op |
6055.2.15
by Jelmer Vernooij
Add URL._combine_paths. |
1019 |
elif p != '': |
1020 |
base_parts.append(p) |
|
1021 |
path = '/'.join(base_parts) |
|
1022 |
if not path.startswith('/'): |
|
1023 |
path = '/' + path |
|
1024 |
return path |
|
1025 |
||
6055.2.17
by Jelmer Vernooij
Add URL.clone(). |
1026 |
def clone(self, offset=None): |
1027 |
"""Return a new URL for a path relative to this URL. |
|
1028 |
||
1029 |
:param offset: A relative path, already urlencoded
|
|
1030 |
:return: `URL` instance
|
|
1031 |
"""
|
|
1032 |
if offset is not None: |
|
6963.1.1
by Jelmer Vernooij
Fix a bunch of tests on python3. |
1033 |
relative = unescape(offset) |
1034 |
if sys.version_info[0] == 2: |
|
1035 |
relative = relative.encode('utf-8') |
|
6055.2.17
by Jelmer Vernooij
Add URL.clone(). |
1036 |
path = self._combine_paths(self.path, relative) |
6379.4.2
by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote. |
1037 |
path = quote(path, safe="/~") |
6055.2.17
by Jelmer Vernooij
Add URL.clone(). |
1038 |
else: |
1039 |
path = self.quoted_path |
|
1040 |
return self.__class__(self.scheme, self.quoted_user, |
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
1041 |
self.quoted_password, self.quoted_host, self.port, |
1042 |
path) |
|
6055.2.17
by Jelmer Vernooij
Add URL.clone(). |
1043 |
|
3873.3.1
by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test |
1044 |
|
1045 |
def parse_url(url): |
|
1046 |
"""Extract the server address, the credentials and the path from the url. |
|
1047 |
||
1048 |
user, password, host and path should be quoted if they contain reserved
|
|
1049 |
chars.
|
|
1050 |
||
1051 |
:param url: an quoted url
|
|
1052 |
:return: (scheme, user, password, host, port, path) tuple, all fields
|
|
1053 |
are unquoted.
|
|
1054 |
"""
|
|
6055.2.7
by Jelmer Vernooij
Change parse_url to URL.from_string. |
1055 |
parsed_url = URL.from_string(url) |
6055.2.6
by Jelmer Vernooij
Split out parse_url. |
1056 |
return (parsed_url.scheme, parsed_url.user, parsed_url.password, |
7143.15.2
by Jelmer Vernooij
Run autopep8. |
1057 |
parsed_url.host, parsed_url.port, parsed_url.path) |