bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
1 |
# Bazaar-NG -- distributed version control
|
2 |
#
|
|
3 |
# Copyright (C) 2006 by Canonical Ltd
|
|
4 |
#
|
|
5 |
# This program is free software; you can redistribute it and/or modify
|
|
6 |
# it under the terms of the GNU General Public License as published by
|
|
7 |
# the Free Software Foundation; either version 2 of the License, or
|
|
8 |
# (at your option) any later version.
|
|
9 |
#
|
|
10 |
# This program is distributed in the hope that it will be useful,
|
|
11 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 |
# GNU General Public License for more details.
|
|
14 |
#
|
|
15 |
# You should have received a copy of the GNU General Public License
|
|
16 |
# along with this program; if not, write to the Free Software
|
|
17 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
18 |
||
19 |
"""A collection of function for handling URL operations."""
|
|
20 |
||
21 |
import urllib |
|
22 |
import sys |
|
23 |
||
24 |
import bzrlib.errors as errors |
|
25 |
import bzrlib.osutils |
|
26 |
||
27 |
||
28 |
def escape(relpath): |
|
29 |
"""Escape relpath to be a valid url.""" |
|
30 |
if isinstance(relpath, unicode): |
|
31 |
relpath = relpath.encode('utf-8') |
|
32 |
# After quoting and encoding, the path should be perfectly
|
|
33 |
# safe as a plain ASCII string, str() just enforces this
|
|
34 |
return str(urllib.quote(relpath)) |
|
35 |
||
36 |
||
37 |
def unescape(url): |
|
38 |
"""Unescape relpath from url format. |
|
39 |
||
40 |
This returns a Unicode path from a URL
|
|
41 |
"""
|
|
42 |
# jam 20060427 URLs are supposed to be ASCII only strings
|
|
43 |
# If they are passed in as unicode, urllib.unquote
|
|
44 |
# will return a UNICODE string, which actually contains
|
|
45 |
# utf-8 bytes. So we have to ensure that they are
|
|
46 |
# plain ASCII strings, or the final .decode will
|
|
47 |
# try to encode the UNICODE => ASCII, and then decode
|
|
48 |
# it into utf-8.
|
|
49 |
try: |
|
50 |
url = str(url) |
|
51 |
except UnicodeError, e: |
|
52 |
raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,)) |
|
53 |
unquoted = urllib.unquote(url) |
|
54 |
try: |
|
55 |
unicode_path = unquoted.decode('utf-8') |
|
56 |
except UnicodeError, e: |
|
57 |
raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,)) |
|
58 |
return unicode_path |
|
59 |
||
60 |
||
61 |
def file_relpath(base, path): |
|
62 |
"""Compute just the relative sub-portion of a url |
|
63 |
|
|
64 |
This assumes that both paths are already fully specified file:// URLs.
|
|
65 |
"""
|
|
66 |
assert len(base) >= MIN_ABS_URLPATHLENGTH, ('Length of base must be equal or' |
|
67 |
' exceed the platform minimum url length (which is %d)' % |
|
68 |
MIN_ABS_URLPATHLENGTH) |
|
69 |
||
70 |
base = local_path_from_url(base) |
|
71 |
path = local_path_from_url(path) |
|
72 |
return escape(bzrlib.osutils.relpath(base, path)) |
|
73 |
||
74 |
||
75 |
def strip_trailing_slash(url): |
|
76 |
"""Strip trailing slash, except for root paths. |
|
77 |
||
78 |
The definition of 'root path' is platform-dependent.
|
|
79 |
But the passed in URL must be a file:/// url.
|
|
80 |
"""
|
|
81 |
assert url.startswith('file:///'), \ |
|
82 |
'strip_trailing_slash expects file:// urls (%s)' % url |
|
83 |
if len(url) != MIN_ABS_URLPATHLENGTH and url[-1] == '/': |
|
84 |
return url[:-1] |
|
85 |
else: |
|
86 |
return url |
|
87 |
||
88 |
||
89 |
# These are characters that if escaped, should stay that way
|
|
90 |
_no_decode_chars = ';/?:@&=+$,#' |
|
91 |
_no_decode_ords = [ord(c) for c in _no_decode_chars] |
|
92 |
_no_decode_hex = (['%02x' % o for o in _no_decode_ords] |
|
93 |
+ ['%02X' % o for o in _no_decode_ords]) |
|
94 |
_hex_display_map = urllib._hextochr.copy() |
|
95 |
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex) |
|
96 |
#These entries get mapped to themselves
|
|
97 |
||
98 |
||
99 |
def unescape_for_display(url): |
|
100 |
"""Decode what you can for a URL, so that we get a nice looking path. |
|
101 |
||
102 |
This will turn file:// urls into local paths, and try to decode
|
|
103 |
any portions of a http:// style url that it can.
|
|
104 |
"""
|
|
105 |
if url.startswith('file://'): |
|
106 |
return local_path_from_url(url) |
|
107 |
||
108 |
# Split into sections to try to decode utf-8
|
|
109 |
res = url.split('/') |
|
110 |
for i in xrange(1, len(res)): |
|
111 |
escaped_chunks = res[i].split('%') |
|
112 |
for j in xrange(1, len(escaped_chunks)): |
|
113 |
item = escaped_chunks[j] |
|
114 |
try: |
|
115 |
escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:] |
|
116 |
except KeyError: |
|
117 |
# Put back the percent symbol
|
|
118 |
escaped_chunks[j] = '%' + item |
|
119 |
except UnicodeDecodeError: |
|
120 |
escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:] |
|
121 |
unescaped = ''.join(escaped_chunks) |
|
122 |
try: |
|
123 |
res[i] = unescaped.decode('utf-8') |
|
124 |
except UnicodeDecodeError: |
|
125 |
# If this path segment cannot be properly utf-8 decoded
|
|
126 |
# after doing unescaping we will just leave it alone
|
|
127 |
pass
|
|
128 |
return '/'.join(res) |
|
129 |
||
130 |
def _posix_local_path_to_url(path): |
|
131 |
"""Convert a local path like ./foo into a URL like file:///path/to/foo |
|
132 |
||
133 |
This also handles transforming escaping unicode characters, etc.
|
|
134 |
"""
|
|
135 |
# importing directly from posixpath allows us to test this
|
|
136 |
# on non-posix platforms
|
|
137 |
from posixpath import normpath |
|
138 |
return 'file://' + escape(normpath(bzrlib.osutils._posix_abspath(path))) |
|
139 |
||
140 |
||
141 |
def _posix_local_path_from_url(url): |
|
142 |
"""Convert a url like file:///path/to/foo into /path/to/foo""" |
|
143 |
if not url.startswith('file:///'): |
|
144 |
raise errors.InvalidURL(url, 'local urls must start with file:///') |
|
145 |
# We only strip off 2 slashes
|
|
146 |
return unescape(url[len('file://'):]) |
|
147 |
||
148 |
||
149 |
def _win32_local_path_to_url(path): |
|
150 |
"""Convert a local path like ./foo into a URL like file:///C|/path/to/foo |
|
151 |
||
152 |
This also handles transforming escaping unicode characters, etc.
|
|
153 |
"""
|
|
154 |
# importing directly from ntpath allows us to test this
|
|
155 |
# on non-win32 platforms
|
|
156 |
# TODO: jam 20060426 consider moving this import outside of the function
|
|
157 |
win32_path = bzrlib.osutils._nt_normpath( |
|
158 |
bzrlib.osutils._win32_abspath(path)).replace('\\', '/') |
|
159 |
return 'file:///' + win32_path[0].upper() + '|' + escape(win32_path[2:]) |
|
160 |
||
161 |
||
162 |
def _win32_local_path_from_url(url): |
|
163 |
"""Convert a url like file:///C|/path/to/foo into C:/path/to/foo""" |
|
164 |
if not url.startswith('file:///'): |
|
165 |
raise errors.InvalidURL(url, 'local urls must start with file:///') |
|
166 |
# We strip off all 3 slashes
|
|
167 |
win32_url = url[len('file:///'):] |
|
168 |
if (win32_url[0] not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' |
|
169 |
or win32_url[1] not in '|:' |
|
170 |
or win32_url[2] != '/'): |
|
171 |
raise errors.InvalidURL(url, 'Win32 file urls start with file:///X|/, where X is a valid drive letter') |
|
172 |
# TODO: jam 20060426, we could .upper() or .lower() the drive letter
|
|
173 |
# for better consistency.
|
|
174 |
return win32_url[0].upper() + u':' + unescape(win32_url[2:]) |
|
175 |
||
176 |
||
177 |
local_path_to_url = _posix_local_path_to_url |
|
178 |
local_path_from_url = _posix_local_path_from_url |
|
179 |
MIN_ABS_URLPATHLENGTH = len('file:///') |
|
180 |
||
181 |
if sys.platform == 'win32': |
|
182 |
local_path_to_url = _win32_local_path_to_url |
|
183 |
local_path_from_url = _win32_local_path_from_url |
|
184 |
||
185 |
MIN_ABS_URLPATHLENGTH = len('file:///C|/') |