bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 1 | # Bazaar-NG -- distributed version control
 | 
| 2 | #
 | |
| 3 | # Copyright (C) 2006 by Canonical Ltd
 | |
| 4 | #
 | |
| 5 | # This program is free software; you can redistribute it and/or modify
 | |
| 6 | # it under the terms of the GNU General Public License as published by
 | |
| 7 | # the Free Software Foundation; either version 2 of the License, or
 | |
| 8 | # (at your option) any later version.
 | |
| 9 | #
 | |
| 10 | # This program is distributed in the hope that it will be useful,
 | |
| 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| 13 | # GNU General Public License for more details.
 | |
| 14 | #
 | |
| 15 | # You should have received a copy of the GNU General Public License
 | |
| 16 | # along with this program; if not, write to the Free Software
 | |
| 17 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| 18 | ||
| 19 | """A collection of function for handling URL operations."""
 | |
| 20 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 21 | import os | 
| 22 | from posixpath import split as _posix_split | |
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 23 | import re | 
| 24 | import sys | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 25 | import urllib | 
| 26 | ||
| 27 | import bzrlib.errors as errors | |
| 28 | import bzrlib.osutils | |
| 29 | ||
| 30 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 31 | def basename(url, exclude_trailing_slash=True): | 
| 32 | """Return the last component of a URL. | |
| 33 | ||
| 34 |     :param url: The URL in question
 | |
| 35 |     :param exclude_trailing_slash: If the url looks like "path/to/foo/"
 | |
| 36 |         ignore the final slash and return 'foo' rather than ''
 | |
| 37 |     :return: Just the final component of the URL. This can return ''
 | |
| 38 |         if you don't exclude_trailing_slash, or if you are at the
 | |
| 39 |         root of the URL.
 | |
| 40 |     """
 | |
| 41 | return split(url, exclude_trailing_slash=exclude_trailing_slash)[1] | |
| 42 | ||
| 43 | ||
| 44 | def dirname(url, exclude_trailing_slash=True): | |
| 45 | """Return the parent directory of the given path. | |
| 46 | ||
| 47 |     :param url: Relative or absolute URL
 | |
| 48 |     :param exclude_trailing_slash: Remove a final slash
 | |
| 49 |         (treat http://host/foo/ as http://host/foo, but
 | |
| 50 |         http://host/ stays http://host/)
 | |
| 51 |     :return: Everything in the URL except the last path chunk
 | |
| 52 |     """
 | |
| 53 |     # TODO: jam 20060502 This was named dirname to be consistent
 | |
| 54 |     #       with the os functions, but maybe "parent" would be better
 | |
| 55 | return split(url, exclude_trailing_slash=exclude_trailing_slash)[0] | |
| 56 | ||
| 57 | ||
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 58 | def escape(relpath): | 
| 59 | """Escape relpath to be a valid url.""" | |
| 60 | if isinstance(relpath, unicode): | |
| 61 | relpath = relpath.encode('utf-8') | |
| 62 |     # After quoting and encoding, the path should be perfectly
 | |
| 63 |     # safe as a plain ASCII string, str() just enforces this
 | |
| 64 | return str(urllib.quote(relpath)) | |
| 65 | ||
| 66 | ||
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 67 | def file_relpath(base, path): | 
| 68 | """Compute just the relative sub-portion of a url | |
| 69 |     
 | |
| 70 |     This assumes that both paths are already fully specified file:// URLs.
 | |
| 71 |     """
 | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 72 | assert len(base) >= MIN_ABS_FILEURL_LENGTH, ('Length of base must be equal or' | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 73 | ' exceed the platform minimum url length (which is %d)' % | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 74 | MIN_ABS_FILEURL_LENGTH) | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 75 | |
| 76 | base = local_path_from_url(base) | |
| 77 | path = local_path_from_url(path) | |
| 78 | return escape(bzrlib.osutils.relpath(base, path)) | |
| 79 | ||
| 80 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 81 | def _find_scheme_and_separator(url): | 
| 82 | """Find the scheme separator (://) and the first path separator | |
| 83 | ||
| 84 |     This is just a helper functions for other path utilities.
 | |
| 85 |     It could probably be replaced by urlparse
 | |
| 86 |     """
 | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 87 | m = _url_scheme_re.match(url) | 
| 88 | if not m: | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 89 | return None, None | 
| 90 | ||
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 91 | scheme = m.group('scheme') | 
| 92 | path = m.group('path') | |
| 93 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 94 |     # Find the path separating slash
 | 
| 95 |     # (first slash after the ://)
 | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 96 | first_path_slash = path.find('/') | 
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 97 | if first_path_slash == -1: | 
| 1685.1.56
by John Arbash Meinel Fixing _find_scheme_and_separator | 98 | return len(scheme), None | 
| 99 | return len(scheme), first_path_slash+len(scheme)+3 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 100 | |
| 101 | ||
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 102 | def join(base, *args): | 
| 103 | """Create a URL by joining sections. | |
| 104 | ||
| 105 |     This will normalize '..', assuming that paths are absolute
 | |
| 106 |     (it assumes no symlinks in either path)
 | |
| 107 | ||
| 108 |     If any of *args is an absolute URL, it will be treated correctly.
 | |
| 109 |     Example:
 | |
| 110 |         join('http://foo', 'http://bar') => 'http://bar'
 | |
| 111 |         join('http://foo', 'bar') => 'http://foo/bar'
 | |
| 112 |         join('http://foo', 'bar', '../baz') => 'http://foo/baz'
 | |
| 113 |     """
 | |
| 114 | m = _url_scheme_re.match(base) | |
| 115 | scheme = None | |
| 116 | if m: | |
| 117 | scheme = m.group('scheme') | |
| 118 | path = m.group('path').split('/') | |
| 119 | else: | |
| 120 | path = base.split('/') | |
| 121 | ||
| 122 | for arg in args: | |
| 123 | m = _url_scheme_re.match(arg) | |
| 124 | if m: | |
| 125 |             # Absolute URL
 | |
| 126 | scheme = m.group('scheme') | |
| 127 | path = m.group('path').split('/') | |
| 128 | else: | |
| 129 | for chunk in arg.split('/'): | |
| 130 | if chunk == '.': | |
| 131 |                     continue
 | |
| 132 | elif chunk == '..': | |
| 133 | if len(path) >= 2: | |
| 134 |                         # Don't pop off the host portion
 | |
| 135 | path.pop() | |
| 136 | else: | |
| 137 | raise errors.InvalidURLJoin('Cannot go above root', | |
| 138 | base, args) | |
| 139 | else: | |
| 140 | path.append(chunk) | |
| 1685.1.80
by Wouter van Heyst more code cleanup | 141 | |
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 142 | if scheme is None: | 
| 143 | return '/'.join(path) | |
| 144 | return scheme + '://' + '/'.join(path) | |
| 145 | ||
| 146 | ||
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 147 | # jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
 | 
| 148 | def _posix_local_path_from_url(url): | |
| 149 | """Convert a url like file:///path/to/foo into /path/to/foo""" | |
| 150 | if not url.startswith('file:///'): | |
| 151 | raise errors.InvalidURL(url, 'local urls must start with file:///') | |
| 152 |     # We only strip off 2 slashes
 | |
| 153 | return unescape(url[len('file://'):]) | |
| 154 | ||
| 155 | ||
| 156 | def _posix_local_path_to_url(path): | |
| 157 | """Convert a local path like ./foo into a URL like file:///path/to/foo | |
| 158 | ||
| 159 |     This also handles transforming escaping unicode characters, etc.
 | |
| 160 |     """
 | |
| 161 |     # importing directly from posixpath allows us to test this 
 | |
| 162 |     # on non-posix platforms
 | |
| 163 | from posixpath import normpath | |
| 164 | return 'file://' + escape(normpath(bzrlib.osutils._posix_abspath(path))) | |
| 165 | ||
| 166 | ||
| 167 | def _win32_local_path_from_url(url): | |
| 168 | """Convert a url like file:///C|/path/to/foo into C:/path/to/foo""" | |
| 169 | if not url.startswith('file:///'): | |
| 170 | raise errors.InvalidURL(url, 'local urls must start with file:///') | |
| 171 |     # We strip off all 3 slashes
 | |
| 172 | win32_url = url[len('file:///'):] | |
| 173 | if (win32_url[0] not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
| 174 | or win32_url[1] not in '|:' | |
| 175 | or win32_url[2] != '/'): | |
| 176 | raise errors.InvalidURL(url, 'Win32 file urls start with file:///X|/, where X is a valid drive letter') | |
| 177 |     # TODO: jam 20060426, we could .upper() or .lower() the drive letter
 | |
| 178 |     #       for better consistency.
 | |
| 179 | return win32_url[0].upper() + u':' + unescape(win32_url[2:]) | |
| 180 | ||
| 181 | ||
| 182 | def _win32_local_path_to_url(path): | |
| 183 | """Convert a local path like ./foo into a URL like file:///C|/path/to/foo | |
| 184 | ||
| 185 |     This also handles transforming escaping unicode characters, etc.
 | |
| 186 |     """
 | |
| 187 |     # importing directly from ntpath allows us to test this 
 | |
| 188 |     # on non-win32 platforms
 | |
| 189 | win32_path = bzrlib.osutils._nt_normpath( | |
| 190 | bzrlib.osutils._win32_abspath(path)).replace('\\', '/') | |
| 1685.1.78
by Wouter van Heyst more code cleanup | 191 | return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:]) | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 192 | |
| 193 | ||
| 194 | local_path_to_url = _posix_local_path_to_url | |
| 195 | local_path_from_url = _posix_local_path_from_url | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 196 | MIN_ABS_FILEURL_LENGTH = len('file:///') | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 197 | |
| 198 | if sys.platform == 'win32': | |
| 199 | local_path_to_url = _win32_local_path_to_url | |
| 200 | local_path_from_url = _win32_local_path_from_url | |
| 201 | ||
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 202 | MIN_ABS_FILEURL_LENGTH = len('file:///C|/') | 
| 203 | ||
| 204 | ||
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 205 | _url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$') | 
| 206 | ||
| 207 | ||
| 208 | def normalize_url(url): | |
| 209 | """Make sure that a path string is in fully normalized URL form. | |
| 210 |     
 | |
| 211 |     This handles URLs which have unicode characters, spaces, 
 | |
| 212 |     special characters, etc.
 | |
| 213 | ||
| 214 |     It has two basic modes of operation, depending on whether the
 | |
| 215 |     supplied string starts with a url specifier (scheme://) or not.
 | |
| 216 |     If it does not have a specifier it is considered a local path,
 | |
| 217 |     and will be converted into a file:/// url. Non-ascii characters
 | |
| 218 |     will be encoded using utf-8.
 | |
| 219 |     If it does have a url specifier, it will be treated as a "hybrid"
 | |
| 220 |     URL. Basically, a URL that should have URL special characters already
 | |
| 221 |     escaped (like +?&# etc), but may have unicode characters, etc
 | |
| 222 |     which would not be valid in a real URL.
 | |
| 223 | ||
| 224 |     :param url: Either a hybrid URL or a local path
 | |
| 225 |     :return: A normalized URL which only includes 7-bit ASCII characters.
 | |
| 226 |     """
 | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 227 | m = _url_scheme_re.match(url) | 
| 228 | if not m: | |
| 229 | return local_path_to_url(url) | |
| 230 | if not isinstance(url, unicode): | |
| 231 | for c in url: | |
| 232 | if c not in _url_safe_characters: | |
| 1685.1.53
by John Arbash Meinel Updated normalize_url | 233 | raise errors.InvalidURL(url, 'URLs can only contain specific' | 
| 234 | ' safe characters (not %r)' % c) | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 235 | return url | 
| 236 |     # We have a unicode (hybrid) url
 | |
| 237 | scheme = m.group('scheme') | |
| 238 | path = list(m.group('path')) | |
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 239 | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 240 | for i in xrange(len(path)): | 
| 241 | if path[i] not in _url_safe_characters: | |
| 242 | chars = path[i].encode('utf-8') | |
| 243 | path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')]) | |
| 244 | return scheme + '://' + ''.join(path) | |
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 245 | |
| 246 | ||
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 247 | def relative_url(base, other): | 
| 248 | """Return a path to other from base. | |
| 249 | ||
| 250 |     If other is unrelated to base, return other. Else return a relative path.
 | |
| 251 |     This assumes no symlinks as part of the url.
 | |
| 252 |     """
 | |
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 253 | dummy, base_first_slash = _find_scheme_and_separator(base) | 
| 254 | if base_first_slash is None: | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 255 | return other | 
| 256 | ||
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 257 | dummy, other_first_slash = _find_scheme_and_separator(other) | 
| 258 | if other_first_slash is None: | |
| 259 | return other | |
| 260 | ||
| 261 |     # this takes care of differing schemes or hosts
 | |
| 262 | base_scheme = base[:base_first_slash] | |
| 263 | other_scheme = other[:other_first_slash] | |
| 264 | if base_scheme != other_scheme: | |
| 265 | return other | |
| 266 | ||
| 267 | base_path = base[base_first_slash+1:] | |
| 268 | other_path = other[other_first_slash+1:] | |
| 269 | ||
| 270 | if base_path.endswith('/'): | |
| 271 | base_path = base_path[:-1] | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 272 | |
| 273 | base_sections = base_path.split('/') | |
| 274 | other_sections = other_path.split('/') | |
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 275 | |
| 276 | if base_sections == ['']: | |
| 277 | base_sections = [] | |
| 278 | if other_sections == ['']: | |
| 279 | other_sections = [] | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 280 | |
| 281 | output_sections = [] | |
| 282 | for b, o in zip(base_sections, other_sections): | |
| 283 | if b != o: | |
| 284 |             break
 | |
| 285 | output_sections.append(b) | |
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 286 | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 287 | match_len = len(output_sections) | 
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 288 | output_sections = ['..' for x in base_sections[match_len:]] | 
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 289 | output_sections.extend(other_sections[match_len:]) | 
| 290 | ||
| 291 | return "/".join(output_sections) or "." | |
| 292 | ||
| 293 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 294 | def split(url, exclude_trailing_slash=True): | 
| 295 | """Split a URL into its parent directory and a child directory. | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 296 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 297 |     :param url: A relative or absolute URL
 | 
| 298 |     :param exclude_trailing_slash: Strip off a final '/' if it is part
 | |
| 299 |         of the path (but not if it is part of the protocol specification)
 | |
| 1685.1.61
by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split | 300 | |
| 301 |     :return: (parent_url, child_dir).  child_dir may be the empty string if we're at 
 | |
| 302 |         the root.
 | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 303 |     """
 | 
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 304 | scheme_loc, first_path_slash = _find_scheme_and_separator(url) | 
| 305 | ||
| 306 | if first_path_slash is None: | |
| 307 |         # We have either a relative path, or no separating slash
 | |
| 308 | if scheme_loc is None: | |
| 309 |             # Relative path
 | |
| 310 | if exclude_trailing_slash and url.endswith('/'): | |
| 311 | url = url[:-1] | |
| 312 | return _posix_split(url) | |
| 313 | else: | |
| 314 |             # Scheme with no path
 | |
| 315 | return url, '' | |
| 316 | ||
| 317 |     # We have a fully defined path
 | |
| 318 | url_base = url[:first_path_slash] # http://host, file:// | |
| 319 | path = url[first_path_slash:] # /file/foo | |
| 320 | ||
| 321 | if sys.platform == 'win32' and url.startswith('file:///'): | |
| 322 |         # Strip off the drive letter
 | |
| 1711.2.39
by John Arbash Meinel Fix bzrlib.urlutils.split() to work properly on win32 local paths. | 323 |         # path is currently /C:/foo
 | 
| 324 | if path[2:3] not in ':|' or path[3:4] not in '\\/': | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 325 | raise errors.InvalidURL(url, | 
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 326 | 'win32 file:/// paths need a drive letter') | 
| 1711.2.39
by John Arbash Meinel Fix bzrlib.urlutils.split() to work properly on win32 local paths. | 327 | url_base += path[0:3] # file:// + /C: | 
| 328 | path = path[3:] # /foo | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 329 | |
| 330 | if exclude_trailing_slash and len(path) > 1 and path.endswith('/'): | |
| 331 | path = path[:-1] | |
| 332 | head, tail = _posix_split(path) | |
| 333 | return url_base + head, tail | |
| 334 | ||
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 335 | |
| 1685.1.47
by John Arbash Meinel s comes before u | 336 | def strip_trailing_slash(url): | 
| 337 | """Strip trailing slash, except for root paths. | |
| 338 | ||
| 339 |     The definition of 'root path' is platform-dependent.
 | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 340 |     This assumes that all URLs are valid netloc urls, such that they
 | 
| 341 |     form:
 | |
| 342 |     scheme://host/path
 | |
| 343 |     It searches for ://, and then refuses to remove the next '/'.
 | |
| 344 |     It can also handle relative paths
 | |
| 345 |     Examples:
 | |
| 346 |         path/to/foo       => path/to/foo
 | |
| 347 |         path/to/foo/      => path/to/foo
 | |
| 348 |         http://host/path/ => http://host/path
 | |
| 349 |         http://host/path  => http://host/path
 | |
| 350 |         http://host/      => http://host/
 | |
| 351 |         file:///          => file:///
 | |
| 352 |         file:///foo/      => file:///foo
 | |
| 353 |         # This is unique on win32 platforms, and is the only URL
 | |
| 354 |         # format which does it differently.
 | |
| 355 |         file:///C|/       => file:///C|/
 | |
| 1685.1.47
by John Arbash Meinel s comes before u | 356 |     """
 | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 357 | if not url.endswith('/'): | 
| 358 |         # Nothing to do
 | |
| 359 | return url | |
| 360 | if sys.platform == 'win32' and url.startswith('file:///'): | |
| 361 |         # This gets handled specially, because the 'top-level'
 | |
| 362 |         # of a win32 path is actually the drive letter
 | |
| 363 | if len(url) > MIN_ABS_FILEURL_LENGTH: | |
| 364 | return url[:-1] | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 365 | else: | 
| 366 | return url | |
| 1685.1.80
by Wouter van Heyst more code cleanup | 367 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 368 | scheme_loc, first_path_slash = _find_scheme_and_separator(url) | 
| 369 | if scheme_loc is None: | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 370 |         # This is a relative path, as it has no scheme
 | 
| 371 |         # so just chop off the last character
 | |
| 1685.1.47
by John Arbash Meinel s comes before u | 372 | return url[:-1] | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 373 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 374 | if first_path_slash is None or first_path_slash == len(url)-1: | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 375 |         # Don't chop off anything if the only slash is the path
 | 
| 376 |         # separating slash
 | |
| 1685.1.47
by John Arbash Meinel s comes before u | 377 | return url | 
| 378 | ||
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 379 | return url[:-1] | 
| 380 | ||
| 1685.1.47
by John Arbash Meinel s comes before u | 381 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 382 | def unescape(url): | 
| 383 | """Unescape relpath from url format. | |
| 384 | ||
| 385 |     This returns a Unicode path from a URL
 | |
| 386 |     """
 | |
| 387 |     # jam 20060427 URLs are supposed to be ASCII only strings
 | |
| 388 |     #       If they are passed in as unicode, urllib.unquote
 | |
| 389 |     #       will return a UNICODE string, which actually contains
 | |
| 390 |     #       utf-8 bytes. So we have to ensure that they are
 | |
| 391 |     #       plain ASCII strings, or the final .decode will
 | |
| 392 |     #       try to encode the UNICODE => ASCII, and then decode
 | |
| 393 |     #       it into utf-8.
 | |
| 394 | try: | |
| 395 | url = str(url) | |
| 396 | except UnicodeError, e: | |
| 397 | raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,)) | |
| 1685.1.80
by Wouter van Heyst more code cleanup | 398 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 399 | unquoted = urllib.unquote(url) | 
| 400 | try: | |
| 401 | unicode_path = unquoted.decode('utf-8') | |
| 402 | except UnicodeError, e: | |
| 403 | raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,)) | |
| 404 | return unicode_path | |
| 405 | ||
| 406 | ||
| 407 | # These are characters that if escaped, should stay that way
 | |
| 408 | _no_decode_chars = ';/?:@&=+$,#' | |
| 409 | _no_decode_ords = [ord(c) for c in _no_decode_chars] | |
| 410 | _no_decode_hex = (['%02x' % o for o in _no_decode_ords] | |
| 411 | + ['%02X' % o for o in _no_decode_ords]) | |
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 412 | _hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)] | 
| 413 | + [('%02X' % o, chr(o)) for o in range(256)])) | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 414 | #These entries get mapped to themselves
 | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 415 | _hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex) | 
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 416 | |
| 417 | # These characters should not be escaped
 | |
| 418 | _url_safe_characters = set('abcdefghijklmnopqrstuvwxyz' | |
| 419 |                         'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 | |
| 420 | '0123456789' '_.-/' | |
| 421 | ';?:@&=+$,%#') | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 422 | |
| 423 | ||
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 424 | def unescape_for_display(url, encoding): | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 425 | """Decode what you can for a URL, so that we get a nice looking path. | 
| 426 | ||
| 427 |     This will turn file:// urls into local paths, and try to decode
 | |
| 428 |     any portions of a http:// style url that it can.
 | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 429 | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 430 |     Any sections of the URL which can't be represented in the encoding or 
 | 
| 431 |     need to stay as escapes are left alone.
 | |
| 432 | ||
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 433 |     :param url: A 7-bit ASCII URL
 | 
| 434 |     :param encoding: The final output encoding
 | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 435 | |
| 436 |     :return: A unicode string which can be safely encoded into the 
 | |
| 437 |          specified encoding.
 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 438 |     """
 | 
| 1711.2.40
by John Arbash Meinel codecs.getwriter() doesn't set '.encoding' properly, so do the work for it. | 439 | assert encoding is not None, 'you cannot specify None for the display encoding.' | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 440 | if url.startswith('file://'): | 
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 441 | try: | 
| 442 | path = local_path_from_url(url) | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 443 | path.encode(encoding) | 
| 444 | return path | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 445 | except UnicodeError: | 
| 446 | return url | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 447 | |
| 448 |     # Split into sections to try to decode utf-8
 | |
| 449 | res = url.split('/') | |
| 450 | for i in xrange(1, len(res)): | |
| 451 | escaped_chunks = res[i].split('%') | |
| 452 | for j in xrange(1, len(escaped_chunks)): | |
| 453 | item = escaped_chunks[j] | |
| 454 | try: | |
| 455 | escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:] | |
| 456 | except KeyError: | |
| 457 |                 # Put back the percent symbol
 | |
| 458 | escaped_chunks[j] = '%' + item | |
| 459 | except UnicodeDecodeError: | |
| 460 | escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:] | |
| 461 | unescaped = ''.join(escaped_chunks) | |
| 462 | try: | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 463 | decoded = unescaped.decode('utf-8') | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 464 | except UnicodeDecodeError: | 
| 465 |             # If this path segment cannot be properly utf-8 decoded
 | |
| 466 |             # after doing unescaping we will just leave it alone
 | |
| 467 |             pass
 | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 468 | else: | 
| 469 | try: | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 470 | decoded.encode(encoding) | 
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 471 | except UnicodeEncodeError: | 
| 472 |                 # If this chunk cannot be encoded in the local
 | |
| 473 |                 # encoding, then we should leave it alone
 | |
| 474 |                 pass
 | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 475 | else: | 
| 476 |                 # Otherwise take the url decoded one
 | |
| 477 | res[i] = decoded | |
| 478 | return u'/'.join(res) |