bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 1861.2.6
by Alexander Belchenko branding: change Bazaar-NG to Bazaar | 1 | # Bazaar -- distributed version control
 | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 2 | #
 | 
| 2052.3.2
by John Arbash Meinel Change Copyright .. by Canonical to Copyright ... Canonical | 3 | # Copyright (C) 2006 Canonical Ltd
 | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 4 | #
 | 
| 5 | # This program is free software; you can redistribute it and/or modify
 | |
| 6 | # it under the terms of the GNU General Public License as published by
 | |
| 7 | # the Free Software Foundation; either version 2 of the License, or
 | |
| 8 | # (at your option) any later version.
 | |
| 9 | #
 | |
| 10 | # This program is distributed in the hope that it will be useful,
 | |
| 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| 13 | # GNU General Public License for more details.
 | |
| 14 | #
 | |
| 15 | # You should have received a copy of the GNU General Public License
 | |
| 16 | # along with this program; if not, write to the Free Software
 | |
| 17 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| 18 | ||
| 19 | """A collection of function for handling URL operations."""
 | |
| 20 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 21 | import os | 
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 22 | import re | 
| 23 | import sys | |
| 1996.3.12
by John Arbash Meinel Change how 'revision' is imported to avoid problems later | 24 | |
| 25 | from bzrlib.lazy_import import lazy_import | |
| 26 | lazy_import(globals(), """ | |
| 27 | from posixpath import split as _posix_split, normpath as _posix_normpath
 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 28 | import urllib
 | 
| 3242.3.26
by Aaron Bentley Implement rebase_url | 29 | import urlparse
 | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 30 | |
| 1996.3.12
by John Arbash Meinel Change how 'revision' is imported to avoid problems later | 31 | from bzrlib import (
 | 
| 32 |     errors,
 | |
| 33 |     osutils,
 | |
| 34 |     )
 | |
| 35 | """) | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 36 | |
| 37 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 38 | def basename(url, exclude_trailing_slash=True): | 
| 39 | """Return the last component of a URL. | |
| 40 | ||
| 41 |     :param url: The URL in question
 | |
| 42 |     :param exclude_trailing_slash: If the url looks like "path/to/foo/"
 | |
| 43 |         ignore the final slash and return 'foo' rather than ''
 | |
| 44 |     :return: Just the final component of the URL. This can return ''
 | |
| 45 |         if you don't exclude_trailing_slash, or if you are at the
 | |
| 46 |         root of the URL.
 | |
| 47 |     """
 | |
| 48 | return split(url, exclude_trailing_slash=exclude_trailing_slash)[1] | |
| 49 | ||
| 50 | ||
| 51 | def dirname(url, exclude_trailing_slash=True): | |
| 52 | """Return the parent directory of the given path. | |
| 53 | ||
| 54 |     :param url: Relative or absolute URL
 | |
| 55 |     :param exclude_trailing_slash: Remove a final slash
 | |
| 56 |         (treat http://host/foo/ as http://host/foo, but
 | |
| 57 |         http://host/ stays http://host/)
 | |
| 58 |     :return: Everything in the URL except the last path chunk
 | |
| 59 |     """
 | |
| 60 |     # TODO: jam 20060502 This was named dirname to be consistent
 | |
| 61 |     #       with the os functions, but maybe "parent" would be better
 | |
| 62 | return split(url, exclude_trailing_slash=exclude_trailing_slash)[0] | |
| 63 | ||
| 64 | ||
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 65 | def escape(relpath): | 
| 66 | """Escape relpath to be a valid url.""" | |
| 67 | if isinstance(relpath, unicode): | |
| 68 | relpath = relpath.encode('utf-8') | |
| 69 |     # After quoting and encoding, the path should be perfectly
 | |
| 70 |     # safe as a plain ASCII string, str() just enforces this
 | |
| 71 | return str(urllib.quote(relpath)) | |
| 72 | ||
| 73 | ||
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 74 | def file_relpath(base, path): | 
| 75 | """Compute just the relative sub-portion of a url | |
| 76 |     
 | |
| 77 |     This assumes that both paths are already fully specified file:// URLs.
 | |
| 78 |     """
 | |
| 3376.2.4
by Martin Pool Remove every assert statement from bzrlib! | 79 | if len(base) < MIN_ABS_FILEURL_LENGTH: | 
| 80 | raise ValueError('Length of base must be equal or' | |
| 81 | ' exceed the platform minimum url length (which is %d)' % | |
| 82 | MIN_ABS_FILEURL_LENGTH) | |
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 83 | base = local_path_from_url(base) | 
| 84 | path = local_path_from_url(path) | |
| 1996.3.12
by John Arbash Meinel Change how 'revision' is imported to avoid problems later | 85 | return escape(osutils.relpath(base, path)) | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 86 | |
| 87 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 88 | def _find_scheme_and_separator(url): | 
| 89 | """Find the scheme separator (://) and the first path separator | |
| 90 | ||
| 91 |     This is just a helper functions for other path utilities.
 | |
| 92 |     It could probably be replaced by urlparse
 | |
| 93 |     """
 | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 94 | m = _url_scheme_re.match(url) | 
| 95 | if not m: | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 96 | return None, None | 
| 97 | ||
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 98 | scheme = m.group('scheme') | 
| 99 | path = m.group('path') | |
| 100 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 101 |     # Find the path separating slash
 | 
| 102 |     # (first slash after the ://)
 | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 103 | first_path_slash = path.find('/') | 
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 104 | if first_path_slash == -1: | 
| 1685.1.56
by John Arbash Meinel Fixing _find_scheme_and_separator | 105 | return len(scheme), None | 
| 106 | return len(scheme), first_path_slash+len(scheme)+3 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 107 | |
| 108 | ||
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 109 | def join(base, *args): | 
| 110 | """Create a URL by joining sections. | |
| 111 | ||
| 112 |     This will normalize '..', assuming that paths are absolute
 | |
| 113 |     (it assumes no symlinks in either path)
 | |
| 114 | ||
| 115 |     If any of *args is an absolute URL, it will be treated correctly.
 | |
| 116 |     Example:
 | |
| 117 |         join('http://foo', 'http://bar') => 'http://bar'
 | |
| 118 |         join('http://foo', 'bar') => 'http://foo/bar'
 | |
| 119 |         join('http://foo', 'bar', '../baz') => 'http://foo/baz'
 | |
| 120 |     """
 | |
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 121 | if not args: | 
| 122 | return base | |
| 123 | match = _url_scheme_re.match(base) | |
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 124 | scheme = None | 
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 125 | if match: | 
| 126 | scheme = match.group('scheme') | |
| 127 | path = match.group('path').split('/') | |
| 1711.2.49
by John Arbash Meinel urlutils.join should work for root paths. | 128 | if path[-1:] == ['']: | 
| 129 |             # Strip off a trailing slash
 | |
| 130 |             # This helps both when we are at the root, and when
 | |
| 131 |             # 'base' has an extra slash at the end
 | |
| 132 | path = path[:-1] | |
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 133 | else: | 
| 134 | path = base.split('/') | |
| 135 | ||
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 136 | if scheme is not None and len(path) >= 1: | 
| 2018.5.93
by Andrew Bennetts Fix another bug in urlutils.join. | 137 | host = path[:1] | 
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 138 |         # the path should be represented as an abs path.
 | 
| 139 |         # we know this must be absolute because of the presence of a URL scheme.
 | |
| 140 | remove_root = True | |
| 141 | path = [''] + path[1:] | |
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 142 | else: | 
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 143 |         # create an empty host, but dont alter the path - this might be a
 | 
| 144 |         # relative url fragment.
 | |
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 145 | host = [] | 
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 146 | remove_root = False | 
| 147 | ||
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 148 | for arg in args: | 
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 149 | match = _url_scheme_re.match(arg) | 
| 150 | if match: | |
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 151 |             # Absolute URL
 | 
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 152 | scheme = match.group('scheme') | 
| 1986.1.10
by Robert Collins Merge from bzr.dev, fixing found bugs handling 'has('/')' in MemoryTransport and SFTP transports. | 153 |             # this skips .. normalisation, making http://host/../../..
 | 
| 154 |             # be rather strange.
 | |
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 155 | path = match.group('path').split('/') | 
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 156 |             # set the host and path according to new absolute URL, discarding
 | 
| 157 |             # any previous values.
 | |
| 158 |             # XXX: duplicates mess from earlier in this function.  This URL
 | |
| 159 |             # manipulation code needs some cleaning up.
 | |
| 160 | if scheme is not None and len(path) >= 1: | |
| 2018.5.92
by Andrew Bennetts Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash. | 161 | host = path[:1] | 
| 162 | path = path[1:] | |
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 163 |                 # url scheme implies absolute path.
 | 
| 164 | path = [''] + path | |
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 165 | else: | 
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 166 |                 # no url scheme we take the path as is.
 | 
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 167 | host = [] | 
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 168 | else: | 
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 169 | path = '/'.join(path) | 
| 170 | path = joinpath(path, arg) | |
| 171 | path = path.split('/') | |
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 172 | if remove_root and path[0:1] == ['']: | 
| 173 | del path[0] | |
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 174 | if host: | 
| 2018.5.92
by Andrew Bennetts Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash. | 175 |         # Remove the leading slash from the path, so long as it isn't also the
 | 
| 176 |         # trailing slash, which we want to keep if present.
 | |
| 177 | if path and path[0] == '' and len(path) > 1: | |
| 2018.5.54
by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone | 178 | del path[0] | 
| 179 | path = host + path | |
| 1685.1.80
by Wouter van Heyst more code cleanup | 180 | |
| 1685.1.55
by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs | 181 | if scheme is None: | 
| 182 | return '/'.join(path) | |
| 183 | return scheme + '://' + '/'.join(path) | |
| 184 | ||
| 185 | ||
| 2018.5.46
by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. | 186 | def joinpath(base, *args): | 
| 187 | """Join URL path segments to a URL path segment. | |
| 188 |     
 | |
| 189 |     This is somewhat like osutils.joinpath, but intended for URLs.
 | |
| 190 | ||
| 191 |     XXX: this duplicates some normalisation logic, and also duplicates a lot of
 | |
| 192 |     path handling logic that already exists in some Transport implementations.
 | |
| 193 |     We really should try to have exactly one place in the code base responsible
 | |
| 194 |     for combining paths of URLs.
 | |
| 195 |     """
 | |
| 2018.5.100
by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. | 196 | path = base.split('/') | 
| 197 | if len(path) > 1 and path[-1] == '': | |
| 198 |         #If the path ends in a trailing /, remove it.
 | |
| 199 | path.pop() | |
| 2018.5.46
by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. | 200 | for arg in args: | 
| 201 | if arg.startswith('/'): | |
| 202 | path = [] | |
| 203 | for chunk in arg.split('/'): | |
| 204 | if chunk == '.': | |
| 205 |                 continue
 | |
| 206 | elif chunk == '..': | |
| 207 | if path == ['']: | |
| 208 | raise errors.InvalidURLJoin('Cannot go above root', | |
| 209 | base, args) | |
| 210 | path.pop() | |
| 211 | else: | |
| 212 | path.append(chunk) | |
| 213 | if path == ['']: | |
| 214 | return '/' | |
| 215 | else: | |
| 216 | return '/'.join(path) | |
| 217 | ||
| 218 | ||
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 219 | # jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
 | 
| 220 | def _posix_local_path_from_url(url): | |
| 221 | """Convert a url like file:///path/to/foo into /path/to/foo""" | |
| 222 | if not url.startswith('file:///'): | |
| 223 | raise errors.InvalidURL(url, 'local urls must start with file:///') | |
| 224 |     # We only strip off 2 slashes
 | |
| 225 | return unescape(url[len('file://'):]) | |
| 226 | ||
| 227 | ||
| 228 | def _posix_local_path_to_url(path): | |
| 229 | """Convert a local path like ./foo into a URL like file:///path/to/foo | |
| 230 | ||
| 231 |     This also handles transforming escaping unicode characters, etc.
 | |
| 232 |     """
 | |
| 233 |     # importing directly from posixpath allows us to test this 
 | |
| 234 |     # on non-posix platforms
 | |
| 1711.4.5
by John Arbash Meinel the _posix_* routines should use posixpath not os.path, so tests pass on win32 | 235 | return 'file://' + escape(_posix_normpath( | 
| 1996.3.12
by John Arbash Meinel Change how 'revision' is imported to avoid problems later | 236 | osutils._posix_abspath(path))) | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 237 | |
| 238 | ||
| 239 | def _win32_local_path_from_url(url): | |
| 1711.4.4
by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior | 240 | """Convert a url like file:///C:/path/to/foo into C:/path/to/foo""" | 
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 241 | if not url.startswith('file://'): | 
| 242 | raise errors.InvalidURL(url, 'local urls must start with file:///, ' | |
| 243 | 'UNC path urls must start with file://') | |
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 244 |     # We strip off all 3 slashes
 | 
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 245 | win32_url = url[len('file:'):] | 
| 2162.2.2
by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path) | 246 |     # check for UNC path: //HOST/path
 | 
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 247 | if not win32_url.startswith('///'): | 
| 2162.2.2
by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path) | 248 | if (win32_url[2] == '/' | 
| 249 | or win32_url[3] in '|:'): | |
| 250 | raise errors.InvalidURL(url, 'Win32 UNC path urls' | |
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 251 | ' have form file://HOST/path') | 
| 2162.2.2
by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path) | 252 | return unescape(win32_url) | 
| 3503.1.2
by adwi2 Permits Windows to serve all paths on all drives. | 253 | |
| 254 |     # allow empty paths so we can serve all roots
 | |
| 255 | if win32_url == '///': | |
| 256 | return '/' | |
| 257 | ||
| 2162.2.2
by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path) | 258 |     # usual local path with drive letter
 | 
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 259 | if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz' | 
| 1711.4.4
by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior | 260 | 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') | 
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 261 | or win32_url[4] not in '|:' | 
| 262 | or win32_url[5] != '/'): | |
| 1711.4.4
by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior | 263 | raise errors.InvalidURL(url, 'Win32 file urls start with' | 
| 1711.4.8
by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters | 264 | ' file:///x:/, where x is a valid drive letter') | 
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 265 | return win32_url[3].upper() + u':' + unescape(win32_url[5:]) | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 266 | |
| 267 | ||
| 268 | def _win32_local_path_to_url(path): | |
| 1711.4.4
by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior | 269 | """Convert a local path like ./foo into a URL like file:///C:/path/to/foo | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 270 | |
| 271 |     This also handles transforming escaping unicode characters, etc.
 | |
| 272 |     """
 | |
| 273 |     # importing directly from ntpath allows us to test this 
 | |
| 1711.4.4
by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior | 274 |     # on non-win32 platform
 | 
| 275 |     # FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
 | |
| 276 |     #       which actually strips trailing space characters.
 | |
| 277 |     #       The worst part is that under linux ntpath.abspath has different
 | |
| 278 |     #       semantics, since 'nt' is not an available module.
 | |
| 3503.1.1
by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from|to)_url | 279 | if path == '/': | 
| 3503.1.2
by adwi2 Permits Windows to serve all paths on all drives. | 280 | return 'file:///' | 
| 3503.1.1
by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from|to)_url | 281 | |
| 2279.4.2
by Alexander Belchenko Don't do normpath after abspath, because this function is called inside abspath | 282 | win32_path = osutils._win32_abspath(path) | 
| 2162.2.2
by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path) | 283 |     # check for UNC path \\HOST\path
 | 
| 284 | if win32_path.startswith('//'): | |
| 2162.2.7
by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path | 285 | return 'file:' + escape(win32_path) | 
| 3234.3.1
by Alexander Belchenko ensure that local_path_to_url() always returns plain string, not unicode. | 286 | return ('file:///' + str(win32_path[0].upper()) + ':' + | 
| 287 | escape(win32_path[2:])) | |
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 288 | |
| 289 | ||
| 290 | local_path_to_url = _posix_local_path_to_url | |
| 291 | local_path_from_url = _posix_local_path_from_url | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 292 | MIN_ABS_FILEURL_LENGTH = len('file:///') | 
| 1711.4.17
by John Arbash Meinel [merge] bzr.dev 1790 | 293 | WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/') | 
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 294 | |
| 295 | if sys.platform == 'win32': | |
| 296 | local_path_to_url = _win32_local_path_to_url | |
| 297 | local_path_from_url = _win32_local_path_from_url | |
| 298 | ||
| 1711.2.44
by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it. | 299 | MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 300 | |
| 301 | ||
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 302 | _url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$') | 
| 2208.4.1
by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'. | 303 | _url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})') | 
| 304 | ||
| 305 | ||
| 306 | def _unescape_safe_chars(matchobj): | |
| 307 | """re.sub callback to convert hex-escapes to plain characters (if safe). | |
| 308 |     
 | |
| 309 |     e.g. '%7E' will be converted to '~'.
 | |
| 310 |     """
 | |
| 311 | hex_digits = matchobj.group(0)[1:] | |
| 312 | char = chr(int(hex_digits, 16)) | |
| 313 | if char in _url_dont_escape_characters: | |
| 314 | return char | |
| 315 | else: | |
| 316 | return matchobj.group(0).upper() | |
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 317 | |
| 318 | ||
| 319 | def normalize_url(url): | |
| 320 | """Make sure that a path string is in fully normalized URL form. | |
| 321 |     
 | |
| 2208.4.1
by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'. | 322 |     This handles URLs which have unicode characters, spaces,
 | 
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 323 |     special characters, etc.
 | 
| 324 | ||
| 325 |     It has two basic modes of operation, depending on whether the
 | |
| 326 |     supplied string starts with a url specifier (scheme://) or not.
 | |
| 327 |     If it does not have a specifier it is considered a local path,
 | |
| 328 |     and will be converted into a file:/// url. Non-ascii characters
 | |
| 329 |     will be encoded using utf-8.
 | |
| 330 |     If it does have a url specifier, it will be treated as a "hybrid"
 | |
| 331 |     URL. Basically, a URL that should have URL special characters already
 | |
| 332 |     escaped (like +?&# etc), but may have unicode characters, etc
 | |
| 333 |     which would not be valid in a real URL.
 | |
| 334 | ||
| 335 |     :param url: Either a hybrid URL or a local path
 | |
| 336 |     :return: A normalized URL which only includes 7-bit ASCII characters.
 | |
| 337 |     """
 | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 338 | m = _url_scheme_re.match(url) | 
| 339 | if not m: | |
| 340 | return local_path_to_url(url) | |
| 2208.4.1
by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'. | 341 | scheme = m.group('scheme') | 
| 342 | path = m.group('path') | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 343 | if not isinstance(url, unicode): | 
| 344 | for c in url: | |
| 345 | if c not in _url_safe_characters: | |
| 1685.1.53
by John Arbash Meinel Updated normalize_url | 346 | raise errors.InvalidURL(url, 'URLs can only contain specific' | 
| 347 | ' safe characters (not %r)' % c) | |
| 2208.4.1
by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'. | 348 | path = _url_hex_escapes_re.sub(_unescape_safe_chars, path) | 
| 349 | return str(scheme + '://' + ''.join(path)) | |
| 350 | ||
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 351 |     # We have a unicode (hybrid) url
 | 
| 2208.4.1
by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'. | 352 | path_chars = list(path) | 
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 353 | |
| 2208.4.1
by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'. | 354 | for i in xrange(len(path_chars)): | 
| 355 | if path_chars[i] not in _url_safe_characters: | |
| 356 | chars = path_chars[i].encode('utf-8') | |
| 357 | path_chars[i] = ''.join( | |
| 358 | ['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')]) | |
| 359 | path = ''.join(path_chars) | |
| 360 | path = _url_hex_escapes_re.sub(_unescape_safe_chars, path) | |
| 361 | return str(scheme + '://' + path) | |
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 362 | |
| 363 | ||
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 364 | def relative_url(base, other): | 
| 365 | """Return a path to other from base. | |
| 366 | ||
| 367 |     If other is unrelated to base, return other. Else return a relative path.
 | |
| 368 |     This assumes no symlinks as part of the url.
 | |
| 369 |     """
 | |
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 370 | dummy, base_first_slash = _find_scheme_and_separator(base) | 
| 371 | if base_first_slash is None: | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 372 | return other | 
| 373 | ||
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 374 | dummy, other_first_slash = _find_scheme_and_separator(other) | 
| 375 | if other_first_slash is None: | |
| 376 | return other | |
| 377 | ||
| 378 |     # this takes care of differing schemes or hosts
 | |
| 379 | base_scheme = base[:base_first_slash] | |
| 380 | other_scheme = other[:other_first_slash] | |
| 381 | if base_scheme != other_scheme: | |
| 382 | return other | |
| 3139.2.1
by Alexander Belchenko bugfix #90847: fix problem with parent location on another logical drive | 383 | elif sys.platform == 'win32' and base_scheme == 'file://': | 
| 384 | base_drive = base[base_first_slash+1:base_first_slash+3] | |
| 385 | other_drive = other[other_first_slash+1:other_first_slash+3] | |
| 386 | if base_drive != other_drive: | |
| 387 | return other | |
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 388 | |
| 389 | base_path = base[base_first_slash+1:] | |
| 390 | other_path = other[other_first_slash+1:] | |
| 391 | ||
| 392 | if base_path.endswith('/'): | |
| 393 | base_path = base_path[:-1] | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 394 | |
| 395 | base_sections = base_path.split('/') | |
| 396 | other_sections = other_path.split('/') | |
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 397 | |
| 398 | if base_sections == ['']: | |
| 399 | base_sections = [] | |
| 400 | if other_sections == ['']: | |
| 401 | other_sections = [] | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 402 | |
| 403 | output_sections = [] | |
| 404 | for b, o in zip(base_sections, other_sections): | |
| 405 | if b != o: | |
| 406 |             break
 | |
| 407 | output_sections.append(b) | |
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 408 | |
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 409 | match_len = len(output_sections) | 
| 1685.1.71
by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls | 410 | output_sections = ['..' for x in base_sections[match_len:]] | 
| 1685.1.70
by Wouter van Heyst working on get_parent, set_parent and relative urls, broken | 411 | output_sections.extend(other_sections[match_len:]) | 
| 412 | ||
| 413 | return "/".join(output_sections) or "." | |
| 414 | ||
| 415 | ||
| 1711.2.43
by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms. | 416 | def _win32_extract_drive_letter(url_base, path): | 
| 417 | """On win32 the drive letter needs to be added to the url base.""" | |
| 418 |     # Strip off the drive letter
 | |
| 419 |     # path is currently /C:/foo
 | |
| 420 | if len(path) < 3 or path[2] not in ':|' or path[3] != '/': | |
| 421 | raise errors.InvalidURL(url_base + path, | |
| 422 | 'win32 file:/// paths need a drive letter') | |
| 423 | url_base += path[0:3] # file:// + /C: | |
| 424 | path = path[3:] # /foo | |
| 425 | return url_base, path | |
| 426 | ||
| 427 | ||
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 428 | def split(url, exclude_trailing_slash=True): | 
| 429 | """Split a URL into its parent directory and a child directory. | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 430 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 431 |     :param url: A relative or absolute URL
 | 
| 432 |     :param exclude_trailing_slash: Strip off a final '/' if it is part
 | |
| 433 |         of the path (but not if it is part of the protocol specification)
 | |
| 1685.1.61
by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split | 434 | |
| 435 |     :return: (parent_url, child_dir).  child_dir may be the empty string if we're at 
 | |
| 436 |         the root.
 | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 437 |     """
 | 
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 438 | scheme_loc, first_path_slash = _find_scheme_and_separator(url) | 
| 439 | ||
| 440 | if first_path_slash is None: | |
| 441 |         # We have either a relative path, or no separating slash
 | |
| 442 | if scheme_loc is None: | |
| 443 |             # Relative path
 | |
| 444 | if exclude_trailing_slash and url.endswith('/'): | |
| 445 | url = url[:-1] | |
| 446 | return _posix_split(url) | |
| 447 | else: | |
| 448 |             # Scheme with no path
 | |
| 449 | return url, '' | |
| 450 | ||
| 451 |     # We have a fully defined path
 | |
| 452 | url_base = url[:first_path_slash] # http://host, file:// | |
| 453 | path = url[first_path_slash:] # /file/foo | |
| 454 | ||
| 455 | if sys.platform == 'win32' and url.startswith('file:///'): | |
| 456 |         # Strip off the drive letter
 | |
| 1711.2.43
by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms. | 457 |         # url_base is currently file://
 | 
| 1711.2.39
by John Arbash Meinel Fix bzrlib.urlutils.split() to work properly on win32 local paths. | 458 |         # path is currently /C:/foo
 | 
| 1711.2.43
by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms. | 459 | url_base, path = _win32_extract_drive_letter(url_base, path) | 
| 460 |         # now it should be file:///C: and /foo
 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 461 | |
| 462 | if exclude_trailing_slash and len(path) > 1 and path.endswith('/'): | |
| 463 | path = path[:-1] | |
| 464 | head, tail = _posix_split(path) | |
| 465 | return url_base + head, tail | |
| 466 | ||
| 1685.1.46
by John Arbash Meinel Sorting functions by name. | 467 | |
| 1711.2.44
by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it. | 468 | def _win32_strip_local_trailing_slash(url): | 
| 469 | """Strip slashes after the drive letter""" | |
| 470 | if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH: | |
| 471 | return url[:-1] | |
| 472 | else: | |
| 473 | return url | |
| 474 | ||
| 475 | ||
| 1685.1.47
by John Arbash Meinel s comes before u | 476 | def strip_trailing_slash(url): | 
| 477 | """Strip trailing slash, except for root paths. | |
| 478 | ||
| 479 |     The definition of 'root path' is platform-dependent.
 | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 480 |     This assumes that all URLs are valid netloc urls, such that they
 | 
| 481 |     form:
 | |
| 482 |     scheme://host/path
 | |
| 483 |     It searches for ://, and then refuses to remove the next '/'.
 | |
| 484 |     It can also handle relative paths
 | |
| 485 |     Examples:
 | |
| 486 |         path/to/foo       => path/to/foo
 | |
| 487 |         path/to/foo/      => path/to/foo
 | |
| 488 |         http://host/path/ => http://host/path
 | |
| 489 |         http://host/path  => http://host/path
 | |
| 490 |         http://host/      => http://host/
 | |
| 491 |         file:///          => file:///
 | |
| 492 |         file:///foo/      => file:///foo
 | |
| 493 |         # This is unique on win32 platforms, and is the only URL
 | |
| 494 |         # format which does it differently.
 | |
| 1711.4.8
by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters | 495 |         file:///c|/       => file:///c:/
 | 
| 1685.1.47
by John Arbash Meinel s comes before u | 496 |     """
 | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 497 | if not url.endswith('/'): | 
| 498 |         # Nothing to do
 | |
| 499 | return url | |
| 2245.6.1
by Alexander Belchenko win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on // | 500 | if sys.platform == 'win32' and url.startswith('file://'): | 
| 1711.2.44
by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it. | 501 | return _win32_strip_local_trailing_slash(url) | 
| 1685.1.80
by Wouter van Heyst more code cleanup | 502 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 503 | scheme_loc, first_path_slash = _find_scheme_and_separator(url) | 
| 504 | if scheme_loc is None: | |
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 505 |         # This is a relative path, as it has no scheme
 | 
| 506 |         # so just chop off the last character
 | |
| 1685.1.47
by John Arbash Meinel s comes before u | 507 | return url[:-1] | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 508 | |
| 1685.1.49
by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname | 509 | if first_path_slash is None or first_path_slash == len(url)-1: | 
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 510 |         # Don't chop off anything if the only slash is the path
 | 
| 511 |         # separating slash
 | |
| 1685.1.47
by John Arbash Meinel s comes before u | 512 | return url | 
| 513 | ||
| 1685.1.48
by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests | 514 | return url[:-1] | 
| 515 | ||
| 1685.1.47
by John Arbash Meinel s comes before u | 516 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 517 | def unescape(url): | 
| 518 | """Unescape relpath from url format. | |
| 519 | ||
| 520 |     This returns a Unicode path from a URL
 | |
| 521 |     """
 | |
| 522 |     # jam 20060427 URLs are supposed to be ASCII only strings
 | |
| 523 |     #       If they are passed in as unicode, urllib.unquote
 | |
| 524 |     #       will return a UNICODE string, which actually contains
 | |
| 525 |     #       utf-8 bytes. So we have to ensure that they are
 | |
| 526 |     #       plain ASCII strings, or the final .decode will
 | |
| 527 |     #       try to encode the UNICODE => ASCII, and then decode
 | |
| 528 |     #       it into utf-8.
 | |
| 529 | try: | |
| 530 | url = str(url) | |
| 531 | except UnicodeError, e: | |
| 532 | raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,)) | |
| 1685.1.80
by Wouter van Heyst more code cleanup | 533 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 534 | unquoted = urllib.unquote(url) | 
| 535 | try: | |
| 536 | unicode_path = unquoted.decode('utf-8') | |
| 537 | except UnicodeError, e: | |
| 538 | raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,)) | |
| 539 | return unicode_path | |
| 540 | ||
| 541 | ||
| 542 | # These are characters that if escaped, should stay that way
 | |
| 543 | _no_decode_chars = ';/?:@&=+$,#' | |
| 544 | _no_decode_ords = [ord(c) for c in _no_decode_chars] | |
| 545 | _no_decode_hex = (['%02x' % o for o in _no_decode_ords] | |
| 546 | + ['%02X' % o for o in _no_decode_ords]) | |
| 1685.1.50
by John Arbash Meinel Added an re for handling scheme paths. | 547 | _hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)] | 
| 548 | + [('%02X' % o, chr(o)) for o in range(256)])) | |
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 549 | #These entries get mapped to themselves
 | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 550 | _hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex) | 
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 551 | |
| 2208.4.1
by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'. | 552 | # These characters shouldn't be percent-encoded, and it's always safe to
 | 
| 553 | # unencode them if they are.
 | |
| 554 | _url_dont_escape_characters = set( | |
| 555 | "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha | |
| 556 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha | |
| 557 | "0123456789" # Numbers | |
| 558 | "-._~" # Unreserved characters | |
| 559 | )
 | |
| 560 | ||
| 1685.1.51
by John Arbash Meinel Working on getting normalize_url working. | 561 | # These characters should not be escaped
 | 
| 2167.2.2
by Aaron Bentley Update safe character list | 562 | _url_safe_characters = set( | 
| 563 | "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha | |
| 564 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha | |
| 565 | "0123456789" # Numbers | |
| 566 | "_.-!~*'()" # Unreserved characters | |
| 567 | "/;?:@&=+$," # Reserved characters | |
| 568 | "%#" # Extra reserved characters | |
| 569 | )
 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 570 | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 571 | def unescape_for_display(url, encoding): | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 572 | """Decode what you can for a URL, so that we get a nice looking path. | 
| 573 | ||
| 574 |     This will turn file:// urls into local paths, and try to decode
 | |
| 575 |     any portions of a http:// style url that it can.
 | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 576 | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 577 |     Any sections of the URL which can't be represented in the encoding or 
 | 
| 578 |     need to stay as escapes are left alone.
 | |
| 579 | ||
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 580 |     :param url: A 7-bit ASCII URL
 | 
| 581 |     :param encoding: The final output encoding
 | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 582 | |
| 583 |     :return: A unicode string which can be safely encoded into the 
 | |
| 584 |          specified encoding.
 | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 585 |     """
 | 
| 3376.2.4
by Martin Pool Remove every assert statement from bzrlib! | 586 | if encoding is None: | 
| 587 | raise ValueError('you cannot specify None for the display encoding') | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 588 | if url.startswith('file://'): | 
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 589 | try: | 
| 590 | path = local_path_from_url(url) | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 591 | path.encode(encoding) | 
| 592 | return path | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 593 | except UnicodeError: | 
| 594 | return url | |
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 595 | |
| 596 |     # Split into sections to try to decode utf-8
 | |
| 597 | res = url.split('/') | |
| 598 | for i in xrange(1, len(res)): | |
| 599 | escaped_chunks = res[i].split('%') | |
| 600 | for j in xrange(1, len(escaped_chunks)): | |
| 601 | item = escaped_chunks[j] | |
| 602 | try: | |
| 603 | escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:] | |
| 604 | except KeyError: | |
| 605 |                 # Put back the percent symbol
 | |
| 606 | escaped_chunks[j] = '%' + item | |
| 607 | except UnicodeDecodeError: | |
| 608 | escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:] | |
| 609 | unescaped = ''.join(escaped_chunks) | |
| 610 | try: | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 611 | decoded = unescaped.decode('utf-8') | 
| 1685.1.45
by John Arbash Meinel Moved url functions into bzrlib.urlutils | 612 | except UnicodeDecodeError: | 
| 613 |             # If this path segment cannot be properly utf-8 decoded
 | |
| 614 |             # after doing unescaping we will just leave it alone
 | |
| 615 |             pass
 | |
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 616 | else: | 
| 617 | try: | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 618 | decoded.encode(encoding) | 
| 1685.1.54
by John Arbash Meinel url_for_display now makes sure output can be properly encoded. | 619 | except UnicodeEncodeError: | 
| 620 |                 # If this chunk cannot be encoded in the local
 | |
| 621 |                 # encoding, then we should leave it alone
 | |
| 622 |                 pass
 | |
| 1685.1.58
by Martin Pool urlutils.unescape_for_display should return Unicode | 623 | else: | 
| 624 |                 # Otherwise take the url decoded one
 | |
| 625 | res[i] = decoded | |
| 626 | return u'/'.join(res) | |
| 2512.4.1
by Ian Clatworthy Fixes #115491 - 'branch lp:projname' now creates ./projname as exected | 627 | |
| 628 | ||
| 629 | def derive_to_location(from_location): | |
| 630 | """Derive a TO_LOCATION given a FROM_LOCATION. | |
| 631 | ||
| 632 |     The normal case is a FROM_LOCATION of http://foo/bar => bar.
 | |
| 633 |     The Right Thing for some logical destinations may differ though
 | |
| 634 |     because no / may be present at all. In that case, the result is
 | |
| 635 |     the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
 | |
| 636 |     This latter case also applies when a Windows drive
 | |
| 637 |     is used without a path, e.g. c:foo-bar => foo-bar.
 | |
| 638 |     If no /, path separator or : is found, the from_location is returned.
 | |
| 639 |     """
 | |
| 640 | if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0: | |
| 641 | return os.path.basename(from_location.rstrip("/\\")) | |
| 642 | else: | |
| 643 | sep = from_location.find(":") | |
| 644 | if sep > 0: | |
| 645 | return from_location[sep+1:] | |
| 646 | else: | |
| 647 | return from_location | |
| 3242.3.26
by Aaron Bentley Implement rebase_url | 648 | |
| 3242.3.35
by Aaron Bentley Cleanups and documentation | 649 | |
| 3242.3.26
by Aaron Bentley Implement rebase_url | 650 | def _is_absolute(url): | 
| 651 | return (osutils.pathjoin('/foo', url) == url) | |
| 652 | ||
| 3242.3.35
by Aaron Bentley Cleanups and documentation | 653 | |
| 3242.3.26
by Aaron Bentley Implement rebase_url | 654 | def rebase_url(url, old_base, new_base): | 
| 655 | """Convert a relative path from an old base URL to a new base URL. | |
| 656 | ||
| 657 |     The result will be a relative path.
 | |
| 658 |     Absolute paths and full URLs are returned unaltered.
 | |
| 659 |     """
 | |
| 660 | scheme, separator = _find_scheme_and_separator(url) | |
| 661 | if scheme is not None: | |
| 662 | return url | |
| 663 | if _is_absolute(url): | |
| 664 | return url | |
| 665 | old_parsed = urlparse.urlparse(old_base) | |
| 666 | new_parsed = urlparse.urlparse(new_base) | |
| 667 | if (old_parsed[:2]) != (new_parsed[:2]): | |
| 3242.3.33
by Aaron Bentley Handle relative URL stacking cleanly | 668 | raise errors.InvalidRebaseURLs(old_base, new_base) | 
| 3242.3.36
by Aaron Bentley Updates from review comments | 669 | return determine_relative_path(new_parsed[2], | 
| 3567.2.1
by Michael Hudson urlutils.rebase_url handles '..' path segments in 'url' | 670 | join(old_parsed[2], url)) | 
| 3242.3.26
by Aaron Bentley Implement rebase_url | 671 | |
| 672 | ||
| 673 | def determine_relative_path(from_path, to_path): | |
| 674 | """Determine a relative path from from_path to to_path.""" | |
| 675 | from_segments = osutils.splitpath(from_path) | |
| 676 | to_segments = osutils.splitpath(to_path) | |
| 677 | count = -1 | |
| 678 | for count, (from_element, to_element) in enumerate(zip(from_segments, | |
| 679 | to_segments)): | |
| 680 | if from_element != to_element: | |
| 681 |             break
 | |
| 682 | else: | |
| 683 | count += 1 | |
| 684 | unique_from = from_segments[count:] | |
| 685 | unique_to = to_segments[count:] | |
| 686 | segments = (['..'] * len(unique_from) + unique_to) | |
| 687 | if len(segments) == 0: | |
| 688 | return '.' | |
| 689 | return osutils.pathjoin(*segments) |