bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
1861.2.6
by Alexander Belchenko
 branding: change Bazaar-NG to Bazaar  | 
1  | 
# Bazaar -- distributed version control
 | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
2  | 
#
 | 
| 
2052.3.2
by John Arbash Meinel
 Change Copyright .. by Canonical to Copyright ... Canonical  | 
3  | 
# Copyright (C) 2006 Canonical Ltd
 | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
4  | 
#
 | 
5  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
6  | 
# it under the terms of the GNU General Public License as published by
 | 
|
7  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
8  | 
# (at your option) any later version.
 | 
|
9  | 
#
 | 
|
10  | 
# This program is distributed in the hope that it will be useful,
 | 
|
11  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
12  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
13  | 
# GNU General Public License for more details.
 | 
|
14  | 
#
 | 
|
15  | 
# You should have received a copy of the GNU General Public License
 | 
|
16  | 
# along with this program; if not, write to the Free Software
 | 
|
17  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
18  | 
||
19  | 
"""A collection of function for handling URL operations."""
 | 
|
20  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
21  | 
import os  | 
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
22  | 
import re  | 
23  | 
import sys  | 
|
| 
1996.3.12
by John Arbash Meinel
 Change how 'revision' is imported to avoid problems later  | 
24  | 
|
25  | 
from bzrlib.lazy_import import lazy_import  | 
|
26  | 
lazy_import(globals(), """  | 
|
27  | 
from posixpath import split as _posix_split, normpath as _posix_normpath
 | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
28  | 
import urllib
 | 
29  | 
||
| 
1996.3.12
by John Arbash Meinel
 Change how 'revision' is imported to avoid problems later  | 
30  | 
from bzrlib import (
 | 
31  | 
    errors,
 | 
|
32  | 
    osutils,
 | 
|
33  | 
    )
 | 
|
34  | 
""")  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
35  | 
|
36  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
37  | 
def basename(url, exclude_trailing_slash=True):  | 
38  | 
"""Return the last component of a URL.  | 
|
39  | 
||
40  | 
    :param url: The URL in question
 | 
|
41  | 
    :param exclude_trailing_slash: If the url looks like "path/to/foo/"
 | 
|
42  | 
        ignore the final slash and return 'foo' rather than ''
 | 
|
43  | 
    :return: Just the final component of the URL. This can return ''
 | 
|
44  | 
        if you don't exclude_trailing_slash, or if you are at the
 | 
|
45  | 
        root of the URL.
 | 
|
46  | 
    """
 | 
|
47  | 
return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]  | 
|
48  | 
||
49  | 
||
50  | 
def dirname(url, exclude_trailing_slash=True):  | 
|
51  | 
"""Return the parent directory of the given path.  | 
|
52  | 
||
53  | 
    :param url: Relative or absolute URL
 | 
|
54  | 
    :param exclude_trailing_slash: Remove a final slash
 | 
|
55  | 
        (treat http://host/foo/ as http://host/foo, but
 | 
|
56  | 
        http://host/ stays http://host/)
 | 
|
57  | 
    :return: Everything in the URL except the last path chunk
 | 
|
58  | 
    """
 | 
|
59  | 
    # TODO: jam 20060502 This was named dirname to be consistent
 | 
|
60  | 
    #       with the os functions, but maybe "parent" would be better
 | 
|
61  | 
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]  | 
|
62  | 
||
63  | 
||
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
64  | 
def escape(relpath):  | 
65  | 
"""Escape relpath to be a valid url."""  | 
|
66  | 
if isinstance(relpath, unicode):  | 
|
67  | 
relpath = relpath.encode('utf-8')  | 
|
68  | 
    # After quoting and encoding, the path should be perfectly
 | 
|
69  | 
    # safe as a plain ASCII string, str() just enforces this
 | 
|
70  | 
return str(urllib.quote(relpath))  | 
|
71  | 
||
72  | 
||
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
73  | 
def file_relpath(base, path):  | 
74  | 
"""Compute just the relative sub-portion of a url  | 
|
75  | 
    
 | 
|
76  | 
    This assumes that both paths are already fully specified file:// URLs.
 | 
|
77  | 
    """
 | 
|
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
78  | 
if len(base) < MIN_ABS_FILEURL_LENGTH:  | 
79  | 
raise ValueError('Length of base must be equal or'  | 
|
80  | 
' exceed the platform minimum url length (which is %d)' %  | 
|
81  | 
MIN_ABS_FILEURL_LENGTH)  | 
|
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
82  | 
base = local_path_from_url(base)  | 
83  | 
path = local_path_from_url(path)  | 
|
| 
1996.3.12
by John Arbash Meinel
 Change how 'revision' is imported to avoid problems later  | 
84  | 
return escape(osutils.relpath(base, path))  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
85  | 
|
86  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
87  | 
def _find_scheme_and_separator(url):  | 
88  | 
"""Find the scheme separator (://) and the first path separator  | 
|
89  | 
||
90  | 
    This is just a helper functions for other path utilities.
 | 
|
91  | 
    It could probably be replaced by urlparse
 | 
|
92  | 
    """
 | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
93  | 
m = _url_scheme_re.match(url)  | 
94  | 
if not m:  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
95  | 
return None, None  | 
96  | 
||
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
97  | 
scheme = m.group('scheme')  | 
98  | 
path = m.group('path')  | 
|
99  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
100  | 
    # Find the path separating slash
 | 
101  | 
    # (first slash after the ://)
 | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
102  | 
first_path_slash = path.find('/')  | 
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
103  | 
if first_path_slash == -1:  | 
| 
1685.1.56
by John Arbash Meinel
 Fixing _find_scheme_and_separator  | 
104  | 
return len(scheme), None  | 
105  | 
return len(scheme), first_path_slash+len(scheme)+3  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
106  | 
|
107  | 
||
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
108  | 
def join(base, *args):  | 
109  | 
"""Create a URL by joining sections.  | 
|
110  | 
||
111  | 
    This will normalize '..', assuming that paths are absolute
 | 
|
112  | 
    (it assumes no symlinks in either path)
 | 
|
113  | 
||
114  | 
    If any of *args is an absolute URL, it will be treated correctly.
 | 
|
115  | 
    Example:
 | 
|
116  | 
        join('http://foo', 'http://bar') => 'http://bar'
 | 
|
117  | 
        join('http://foo', 'bar') => 'http://foo/bar'
 | 
|
118  | 
        join('http://foo', 'bar', '../baz') => 'http://foo/baz'
 | 
|
119  | 
    """
 | 
|
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
120  | 
if not args:  | 
121  | 
return base  | 
|
122  | 
match = _url_scheme_re.match(base)  | 
|
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
123  | 
scheme = None  | 
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
124  | 
if match:  | 
125  | 
scheme = match.group('scheme')  | 
|
126  | 
path = match.group('path').split('/')  | 
|
| 
1711.2.49
by John Arbash Meinel
 urlutils.join should work for root paths.  | 
127  | 
if path[-1:] == ['']:  | 
128  | 
            # Strip off a trailing slash
 | 
|
129  | 
            # This helps both when we are at the root, and when
 | 
|
130  | 
            # 'base' has an extra slash at the end
 | 
|
131  | 
path = path[:-1]  | 
|
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
132  | 
else:  | 
133  | 
path = base.split('/')  | 
|
134  | 
||
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
135  | 
if scheme is not None and len(path) >= 1:  | 
| 
2018.5.93
by Andrew Bennetts
 Fix another bug in urlutils.join.  | 
136  | 
host = path[:1]  | 
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
137  | 
        # the path should be represented as an abs path.
 | 
138  | 
        # we know this must be absolute because of the presence of a URL scheme.
 | 
|
139  | 
remove_root = True  | 
|
140  | 
path = [''] + path[1:]  | 
|
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
141  | 
else:  | 
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
142  | 
        # create an empty host, but dont alter the path - this might be a
 | 
143  | 
        # relative url fragment.
 | 
|
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
144  | 
host = []  | 
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
145  | 
remove_root = False  | 
146  | 
||
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
147  | 
for arg in args:  | 
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
148  | 
match = _url_scheme_re.match(arg)  | 
149  | 
if match:  | 
|
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
150  | 
            # Absolute URL
 | 
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
151  | 
scheme = match.group('scheme')  | 
| 
1986.1.10
by Robert Collins
 Merge from bzr.dev, fixing found bugs handling 'has('/')' in MemoryTransport and SFTP transports.  | 
152  | 
            # this skips .. normalisation, making http://host/../../..
 | 
153  | 
            # be rather strange.
 | 
|
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
154  | 
path = match.group('path').split('/')  | 
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
155  | 
            # set the host and path according to new absolute URL, discarding
 | 
156  | 
            # any previous values.
 | 
|
157  | 
            # XXX: duplicates mess from earlier in this function.  This URL
 | 
|
158  | 
            # manipulation code needs some cleaning up.
 | 
|
159  | 
if scheme is not None and len(path) >= 1:  | 
|
| 
2018.5.92
by Andrew Bennetts
 Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash.  | 
160  | 
host = path[:1]  | 
161  | 
path = path[1:]  | 
|
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
162  | 
                # url scheme implies absolute path.
 | 
163  | 
path = [''] + path  | 
|
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
164  | 
else:  | 
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
165  | 
                # no url scheme we take the path as is.
 | 
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
166  | 
host = []  | 
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
167  | 
else:  | 
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
168  | 
path = '/'.join(path)  | 
169  | 
path = joinpath(path, arg)  | 
|
170  | 
path = path.split('/')  | 
|
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
171  | 
if remove_root and path[0:1] == ['']:  | 
172  | 
del path[0]  | 
|
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
173  | 
if host:  | 
| 
2018.5.92
by Andrew Bennetts
 Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash.  | 
174  | 
        # Remove the leading slash from the path, so long as it isn't also the
 | 
175  | 
        # trailing slash, which we want to keep if present.
 | 
|
176  | 
if path and path[0] == '' and len(path) > 1:  | 
|
| 
2018.5.54
by Andrew Bennetts
 Fix ChrootTransportDecorator's abspath method to be consistent with its clone  | 
177  | 
del path[0]  | 
178  | 
path = host + path  | 
|
| 
1685.1.80
by Wouter van Heyst
 more code cleanup  | 
179  | 
|
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
180  | 
if scheme is None:  | 
181  | 
return '/'.join(path)  | 
|
182  | 
return scheme + '://' + '/'.join(path)  | 
|
183  | 
||
184  | 
||
| 
2018.5.46
by Andrew Bennetts
 Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.  | 
185  | 
def joinpath(base, *args):  | 
186  | 
"""Join URL path segments to a URL path segment.  | 
|
187  | 
    
 | 
|
188  | 
    This is somewhat like osutils.joinpath, but intended for URLs.
 | 
|
189  | 
||
190  | 
    XXX: this duplicates some normalisation logic, and also duplicates a lot of
 | 
|
191  | 
    path handling logic that already exists in some Transport implementations.
 | 
|
192  | 
    We really should try to have exactly one place in the code base responsible
 | 
|
193  | 
    for combining paths of URLs.
 | 
|
194  | 
    """
 | 
|
| 
2018.5.100
by Andrew Bennetts
 Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.  | 
195  | 
path = base.split('/')  | 
196  | 
if len(path) > 1 and path[-1] == '':  | 
|
197  | 
        #If the path ends in a trailing /, remove it.
 | 
|
198  | 
path.pop()  | 
|
| 
2018.5.46
by Andrew Bennetts
 Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.  | 
199  | 
for arg in args:  | 
200  | 
if arg.startswith('/'):  | 
|
201  | 
path = []  | 
|
202  | 
for chunk in arg.split('/'):  | 
|
203  | 
if chunk == '.':  | 
|
204  | 
                continue
 | 
|
205  | 
elif chunk == '..':  | 
|
206  | 
if path == ['']:  | 
|
207  | 
raise errors.InvalidURLJoin('Cannot go above root',  | 
|
208  | 
base, args)  | 
|
209  | 
path.pop()  | 
|
210  | 
else:  | 
|
211  | 
path.append(chunk)  | 
|
212  | 
if path == ['']:  | 
|
213  | 
return '/'  | 
|
214  | 
else:  | 
|
215  | 
return '/'.join(path)  | 
|
216  | 
||
217  | 
||
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
218  | 
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
 | 
219  | 
def _posix_local_path_from_url(url):  | 
|
220  | 
"""Convert a url like file:///path/to/foo into /path/to/foo"""  | 
|
221  | 
if not url.startswith('file:///'):  | 
|
222  | 
raise errors.InvalidURL(url, 'local urls must start with file:///')  | 
|
223  | 
    # We only strip off 2 slashes
 | 
|
224  | 
return unescape(url[len('file://'):])  | 
|
225  | 
||
226  | 
||
227  | 
def _posix_local_path_to_url(path):  | 
|
228  | 
"""Convert a local path like ./foo into a URL like file:///path/to/foo  | 
|
229  | 
||
230  | 
    This also handles transforming escaping unicode characters, etc.
 | 
|
231  | 
    """
 | 
|
232  | 
    # importing directly from posixpath allows us to test this 
 | 
|
233  | 
    # on non-posix platforms
 | 
|
| 
1711.4.5
by John Arbash Meinel
 the _posix_* routines should use posixpath not os.path, so tests pass on win32  | 
234  | 
return 'file://' + escape(_posix_normpath(  | 
| 
1996.3.12
by John Arbash Meinel
 Change how 'revision' is imported to avoid problems later  | 
235  | 
osutils._posix_abspath(path)))  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
236  | 
|
237  | 
||
238  | 
def _win32_local_path_from_url(url):  | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
239  | 
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""  | 
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
240  | 
if not url.startswith('file://'):  | 
241  | 
raise errors.InvalidURL(url, 'local urls must start with file:///, '  | 
|
242  | 
'UNC path urls must start with file://')  | 
|
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
243  | 
    # We strip off all 3 slashes
 | 
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
244  | 
win32_url = url[len('file:'):]  | 
| 
2162.2.2
by Alexander Belchenko
 Support for win32 UNC path (like: \\HOST\path)  | 
245  | 
    # check for UNC path: //HOST/path
 | 
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
246  | 
if not win32_url.startswith('///'):  | 
| 
2162.2.2
by Alexander Belchenko
 Support for win32 UNC path (like: \\HOST\path)  | 
247  | 
if (win32_url[2] == '/'  | 
248  | 
or win32_url[3] in '|:'):  | 
|
249  | 
raise errors.InvalidURL(url, 'Win32 UNC path urls'  | 
|
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
250  | 
' have form file://HOST/path')  | 
| 
2162.2.2
by Alexander Belchenko
 Support for win32 UNC path (like: \\HOST\path)  | 
251  | 
return unescape(win32_url)  | 
252  | 
    # usual local path with drive letter
 | 
|
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
253  | 
if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'  | 
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
254  | 
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')  | 
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
255  | 
or win32_url[4] not in '|:'  | 
256  | 
or win32_url[5] != '/'):  | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
257  | 
raise errors.InvalidURL(url, 'Win32 file urls start with'  | 
| 
1711.4.8
by John Arbash Meinel
 switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters  | 
258  | 
' file:///x:/, where x is a valid drive letter')  | 
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
259  | 
return win32_url[3].upper() + u':' + unescape(win32_url[5:])  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
260  | 
|
261  | 
||
262  | 
def _win32_local_path_to_url(path):  | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
263  | 
"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
264  | 
|
265  | 
    This also handles transforming escaping unicode characters, etc.
 | 
|
266  | 
    """
 | 
|
267  | 
    # importing directly from ntpath allows us to test this 
 | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
268  | 
    # on non-win32 platform
 | 
269  | 
    # FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
 | 
|
270  | 
    #       which actually strips trailing space characters.
 | 
|
271  | 
    #       The worst part is that under linux ntpath.abspath has different
 | 
|
272  | 
    #       semantics, since 'nt' is not an available module.
 | 
|
| 
2279.4.2
by Alexander Belchenko
 Don't do normpath after abspath, because this function is called inside abspath  | 
273  | 
win32_path = osutils._win32_abspath(path)  | 
| 
2162.2.2
by Alexander Belchenko
 Support for win32 UNC path (like: \\HOST\path)  | 
274  | 
    # check for UNC path \\HOST\path
 | 
275  | 
if win32_path.startswith('//'):  | 
|
| 
2162.2.7
by Alexander Belchenko
 Win32 UNC path \\HOST\path mapped to URL file://HOST/path  | 
276  | 
return 'file:' + escape(win32_path)  | 
| 
3234.3.1
by Alexander Belchenko
 ensure that local_path_to_url() always returns plain string, not unicode.  | 
277  | 
return ('file:///' + str(win32_path[0].upper()) + ':' +  | 
278  | 
escape(win32_path[2:]))  | 
|
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
279  | 
|
280  | 
||
281  | 
local_path_to_url = _posix_local_path_to_url  | 
|
282  | 
local_path_from_url = _posix_local_path_from_url  | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
283  | 
MIN_ABS_FILEURL_LENGTH = len('file:///')  | 
| 
1711.4.17
by John Arbash Meinel
 [merge] bzr.dev 1790  | 
284  | 
WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
285  | 
|
286  | 
if sys.platform == 'win32':  | 
|
287  | 
local_path_to_url = _win32_local_path_to_url  | 
|
288  | 
local_path_from_url = _win32_local_path_from_url  | 
|
289  | 
||
| 
1711.2.44
by John Arbash Meinel
 Factor out another win32 special case and add platform independent tests for it.  | 
290  | 
MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH  | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
291  | 
|
292  | 
||
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
293  | 
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')  | 
| 
2208.4.1
by Andrew Bennetts
 normalize_url should normalise escaping of unreserved characters, like '~'.  | 
294  | 
_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')  | 
295  | 
||
296  | 
||
297  | 
def _unescape_safe_chars(matchobj):  | 
|
298  | 
"""re.sub callback to convert hex-escapes to plain characters (if safe).  | 
|
299  | 
    
 | 
|
300  | 
    e.g. '%7E' will be converted to '~'.
 | 
|
301  | 
    """
 | 
|
302  | 
hex_digits = matchobj.group(0)[1:]  | 
|
303  | 
char = chr(int(hex_digits, 16))  | 
|
304  | 
if char in _url_dont_escape_characters:  | 
|
305  | 
return char  | 
|
306  | 
else:  | 
|
307  | 
return matchobj.group(0).upper()  | 
|
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
308  | 
|
309  | 
||
310  | 
def normalize_url(url):  | 
|
311  | 
"""Make sure that a path string is in fully normalized URL form.  | 
|
312  | 
    
 | 
|
| 
2208.4.1
by Andrew Bennetts
 normalize_url should normalise escaping of unreserved characters, like '~'.  | 
313  | 
    This handles URLs which have unicode characters, spaces,
 | 
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
314  | 
    special characters, etc.
 | 
315  | 
||
316  | 
    It has two basic modes of operation, depending on whether the
 | 
|
317  | 
    supplied string starts with a url specifier (scheme://) or not.
 | 
|
318  | 
    If it does not have a specifier it is considered a local path,
 | 
|
319  | 
    and will be converted into a file:/// url. Non-ascii characters
 | 
|
320  | 
    will be encoded using utf-8.
 | 
|
321  | 
    If it does have a url specifier, it will be treated as a "hybrid"
 | 
|
322  | 
    URL. Basically, a URL that should have URL special characters already
 | 
|
323  | 
    escaped (like +?&# etc), but may have unicode characters, etc
 | 
|
324  | 
    which would not be valid in a real URL.
 | 
|
325  | 
||
326  | 
    :param url: Either a hybrid URL or a local path
 | 
|
327  | 
    :return: A normalized URL which only includes 7-bit ASCII characters.
 | 
|
328  | 
    """
 | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
329  | 
m = _url_scheme_re.match(url)  | 
330  | 
if not m:  | 
|
331  | 
return local_path_to_url(url)  | 
|
| 
2208.4.1
by Andrew Bennetts
 normalize_url should normalise escaping of unreserved characters, like '~'.  | 
332  | 
scheme = m.group('scheme')  | 
333  | 
path = m.group('path')  | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
334  | 
if not isinstance(url, unicode):  | 
335  | 
for c in url:  | 
|
336  | 
if c not in _url_safe_characters:  | 
|
| 
1685.1.53
by John Arbash Meinel
 Updated normalize_url  | 
337  | 
raise errors.InvalidURL(url, 'URLs can only contain specific'  | 
338  | 
' safe characters (not %r)' % c)  | 
|
| 
2208.4.1
by Andrew Bennetts
 normalize_url should normalise escaping of unreserved characters, like '~'.  | 
339  | 
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)  | 
340  | 
return str(scheme + '://' + ''.join(path))  | 
|
341  | 
||
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
342  | 
    # We have a unicode (hybrid) url
 | 
| 
2208.4.1
by Andrew Bennetts
 normalize_url should normalise escaping of unreserved characters, like '~'.  | 
343  | 
path_chars = list(path)  | 
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
344  | 
|
| 
2208.4.1
by Andrew Bennetts
 normalize_url should normalise escaping of unreserved characters, like '~'.  | 
345  | 
for i in xrange(len(path_chars)):  | 
346  | 
if path_chars[i] not in _url_safe_characters:  | 
|
347  | 
chars = path_chars[i].encode('utf-8')  | 
|
348  | 
path_chars[i] = ''.join(  | 
|
349  | 
['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])  | 
|
350  | 
path = ''.join(path_chars)  | 
|
351  | 
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)  | 
|
352  | 
return str(scheme + '://' + path)  | 
|
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
353  | 
|
354  | 
||
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
355  | 
def relative_url(base, other):  | 
356  | 
"""Return a path to other from base.  | 
|
357  | 
||
358  | 
    If other is unrelated to base, return other. Else return a relative path.
 | 
|
359  | 
    This assumes no symlinks as part of the url.
 | 
|
360  | 
    """
 | 
|
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
361  | 
dummy, base_first_slash = _find_scheme_and_separator(base)  | 
362  | 
if base_first_slash is None:  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
363  | 
return other  | 
364  | 
||
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
365  | 
dummy, other_first_slash = _find_scheme_and_separator(other)  | 
366  | 
if other_first_slash is None:  | 
|
367  | 
return other  | 
|
368  | 
||
369  | 
    # this takes care of differing schemes or hosts
 | 
|
370  | 
base_scheme = base[:base_first_slash]  | 
|
371  | 
other_scheme = other[:other_first_slash]  | 
|
372  | 
if base_scheme != other_scheme:  | 
|
373  | 
return other  | 
|
| 
3139.2.1
by Alexander Belchenko
 bugfix #90847: fix problem with parent location on another logical drive  | 
374  | 
elif sys.platform == 'win32' and base_scheme == 'file://':  | 
375  | 
base_drive = base[base_first_slash+1:base_first_slash+3]  | 
|
376  | 
other_drive = other[other_first_slash+1:other_first_slash+3]  | 
|
377  | 
if base_drive != other_drive:  | 
|
378  | 
return other  | 
|
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
379  | 
|
380  | 
base_path = base[base_first_slash+1:]  | 
|
381  | 
other_path = other[other_first_slash+1:]  | 
|
382  | 
||
383  | 
if base_path.endswith('/'):  | 
|
384  | 
base_path = base_path[:-1]  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
385  | 
|
386  | 
base_sections = base_path.split('/')  | 
|
387  | 
other_sections = other_path.split('/')  | 
|
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
388  | 
|
389  | 
if base_sections == ['']:  | 
|
390  | 
base_sections = []  | 
|
391  | 
if other_sections == ['']:  | 
|
392  | 
other_sections = []  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
393  | 
|
394  | 
output_sections = []  | 
|
395  | 
for b, o in zip(base_sections, other_sections):  | 
|
396  | 
if b != o:  | 
|
397  | 
            break
 | 
|
398  | 
output_sections.append(b)  | 
|
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
399  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
400  | 
match_len = len(output_sections)  | 
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
401  | 
output_sections = ['..' for x in base_sections[match_len:]]  | 
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
402  | 
output_sections.extend(other_sections[match_len:])  | 
403  | 
||
404  | 
return "/".join(output_sections) or "."  | 
|
405  | 
||
406  | 
||
| 
1711.2.43
by John Arbash Meinel
 Split out win32 specific code so that it can be tested on all platforms.  | 
407  | 
def _win32_extract_drive_letter(url_base, path):  | 
408  | 
"""On win32 the drive letter needs to be added to the url base."""  | 
|
409  | 
    # Strip off the drive letter
 | 
|
410  | 
    # path is currently /C:/foo
 | 
|
411  | 
if len(path) < 3 or path[2] not in ':|' or path[3] != '/':  | 
|
412  | 
raise errors.InvalidURL(url_base + path,  | 
|
413  | 
'win32 file:/// paths need a drive letter')  | 
|
414  | 
url_base += path[0:3] # file:// + /C:  | 
|
415  | 
path = path[3:] # /foo  | 
|
416  | 
return url_base, path  | 
|
417  | 
||
418  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
419  | 
def split(url, exclude_trailing_slash=True):  | 
420  | 
"""Split a URL into its parent directory and a child directory.  | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
421  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
422  | 
    :param url: A relative or absolute URL
 | 
423  | 
    :param exclude_trailing_slash: Strip off a final '/' if it is part
 | 
|
424  | 
        of the path (but not if it is part of the protocol specification)
 | 
|
| 
1685.1.61
by Martin Pool
 [broken] Change BzrDir._make_tail to use urlutils.split  | 
425  | 
|
426  | 
    :return: (parent_url, child_dir).  child_dir may be the empty string if we're at 
 | 
|
427  | 
        the root.
 | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
428  | 
    """
 | 
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
429  | 
scheme_loc, first_path_slash = _find_scheme_and_separator(url)  | 
430  | 
||
431  | 
if first_path_slash is None:  | 
|
432  | 
        # We have either a relative path, or no separating slash
 | 
|
433  | 
if scheme_loc is None:  | 
|
434  | 
            # Relative path
 | 
|
435  | 
if exclude_trailing_slash and url.endswith('/'):  | 
|
436  | 
url = url[:-1]  | 
|
437  | 
return _posix_split(url)  | 
|
438  | 
else:  | 
|
439  | 
            # Scheme with no path
 | 
|
440  | 
return url, ''  | 
|
441  | 
||
442  | 
    # We have a fully defined path
 | 
|
443  | 
url_base = url[:first_path_slash] # http://host, file://  | 
|
444  | 
path = url[first_path_slash:] # /file/foo  | 
|
445  | 
||
446  | 
if sys.platform == 'win32' and url.startswith('file:///'):  | 
|
447  | 
        # Strip off the drive letter
 | 
|
| 
1711.2.43
by John Arbash Meinel
 Split out win32 specific code so that it can be tested on all platforms.  | 
448  | 
        # url_base is currently file://
 | 
| 
1711.2.39
by John Arbash Meinel
 Fix bzrlib.urlutils.split() to work properly on win32 local paths.  | 
449  | 
        # path is currently /C:/foo
 | 
| 
1711.2.43
by John Arbash Meinel
 Split out win32 specific code so that it can be tested on all platforms.  | 
450  | 
url_base, path = _win32_extract_drive_letter(url_base, path)  | 
451  | 
        # now it should be file:///C: and /foo
 | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
452  | 
|
453  | 
if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):  | 
|
454  | 
path = path[:-1]  | 
|
455  | 
head, tail = _posix_split(path)  | 
|
456  | 
return url_base + head, tail  | 
|
457  | 
||
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
458  | 
|
| 
1711.2.44
by John Arbash Meinel
 Factor out another win32 special case and add platform independent tests for it.  | 
459  | 
def _win32_strip_local_trailing_slash(url):  | 
460  | 
"""Strip slashes after the drive letter"""  | 
|
461  | 
if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:  | 
|
462  | 
return url[:-1]  | 
|
463  | 
else:  | 
|
464  | 
return url  | 
|
465  | 
||
466  | 
||
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
467  | 
def strip_trailing_slash(url):  | 
468  | 
"""Strip trailing slash, except for root paths.  | 
|
469  | 
||
470  | 
    The definition of 'root path' is platform-dependent.
 | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
471  | 
    This assumes that all URLs are valid netloc urls, such that they
 | 
472  | 
    form:
 | 
|
473  | 
    scheme://host/path
 | 
|
474  | 
    It searches for ://, and then refuses to remove the next '/'.
 | 
|
475  | 
    It can also handle relative paths
 | 
|
476  | 
    Examples:
 | 
|
477  | 
        path/to/foo       => path/to/foo
 | 
|
478  | 
        path/to/foo/      => path/to/foo
 | 
|
479  | 
        http://host/path/ => http://host/path
 | 
|
480  | 
        http://host/path  => http://host/path
 | 
|
481  | 
        http://host/      => http://host/
 | 
|
482  | 
        file:///          => file:///
 | 
|
483  | 
        file:///foo/      => file:///foo
 | 
|
484  | 
        # This is unique on win32 platforms, and is the only URL
 | 
|
485  | 
        # format which does it differently.
 | 
|
| 
1711.4.8
by John Arbash Meinel
 switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters  | 
486  | 
        file:///c|/       => file:///c:/
 | 
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
487  | 
    """
 | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
488  | 
if not url.endswith('/'):  | 
489  | 
        # Nothing to do
 | 
|
490  | 
return url  | 
|
| 
2245.6.1
by Alexander Belchenko
 win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on //  | 
491  | 
if sys.platform == 'win32' and url.startswith('file://'):  | 
| 
1711.2.44
by John Arbash Meinel
 Factor out another win32 special case and add platform independent tests for it.  | 
492  | 
return _win32_strip_local_trailing_slash(url)  | 
| 
1685.1.80
by Wouter van Heyst
 more code cleanup  | 
493  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
494  | 
scheme_loc, first_path_slash = _find_scheme_and_separator(url)  | 
495  | 
if scheme_loc is None:  | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
496  | 
        # This is a relative path, as it has no scheme
 | 
497  | 
        # so just chop off the last character
 | 
|
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
498  | 
return url[:-1]  | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
499  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
500  | 
if first_path_slash is None or first_path_slash == len(url)-1:  | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
501  | 
        # Don't chop off anything if the only slash is the path
 | 
502  | 
        # separating slash
 | 
|
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
503  | 
return url  | 
504  | 
||
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
505  | 
return url[:-1]  | 
506  | 
||
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
507  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
508  | 
def unescape(url):  | 
509  | 
"""Unescape relpath from url format.  | 
|
510  | 
||
511  | 
    This returns a Unicode path from a URL
 | 
|
512  | 
    """
 | 
|
513  | 
    # jam 20060427 URLs are supposed to be ASCII only strings
 | 
|
514  | 
    #       If they are passed in as unicode, urllib.unquote
 | 
|
515  | 
    #       will return a UNICODE string, which actually contains
 | 
|
516  | 
    #       utf-8 bytes. So we have to ensure that they are
 | 
|
517  | 
    #       plain ASCII strings, or the final .decode will
 | 
|
518  | 
    #       try to encode the UNICODE => ASCII, and then decode
 | 
|
519  | 
    #       it into utf-8.
 | 
|
520  | 
try:  | 
|
521  | 
url = str(url)  | 
|
522  | 
except UnicodeError, e:  | 
|
523  | 
raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))  | 
|
| 
1685.1.80
by Wouter van Heyst
 more code cleanup  | 
524  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
525  | 
unquoted = urllib.unquote(url)  | 
526  | 
try:  | 
|
527  | 
unicode_path = unquoted.decode('utf-8')  | 
|
528  | 
except UnicodeError, e:  | 
|
529  | 
raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))  | 
|
530  | 
return unicode_path  | 
|
531  | 
||
532  | 
||
533  | 
# These are characters that if escaped, should stay that way
 | 
|
534  | 
_no_decode_chars = ';/?:@&=+$,#'  | 
|
535  | 
_no_decode_ords = [ord(c) for c in _no_decode_chars]  | 
|
536  | 
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]  | 
|
537  | 
+ ['%02X' % o for o in _no_decode_ords])  | 
|
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
538  | 
_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]  | 
539  | 
+ [('%02X' % o, chr(o)) for o in range(256)]))  | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
540  | 
#These entries get mapped to themselves
 | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
541  | 
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)  | 
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
542  | 
|
| 
2208.4.1
by Andrew Bennetts
 normalize_url should normalise escaping of unreserved characters, like '~'.  | 
543  | 
# These characters shouldn't be percent-encoded, and it's always safe to
 | 
544  | 
# unencode them if they are.
 | 
|
545  | 
_url_dont_escape_characters = set(  | 
|
546  | 
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha  | 
|
547  | 
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha  | 
|
548  | 
"0123456789" # Numbers  | 
|
549  | 
"-._~" # Unreserved characters  | 
|
550  | 
)
 | 
|
551  | 
||
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
552  | 
# These characters should not be escaped
 | 
| 
2167.2.2
by Aaron Bentley
 Update safe character list  | 
553  | 
_url_safe_characters = set(  | 
554  | 
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha  | 
|
555  | 
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha  | 
|
556  | 
"0123456789" # Numbers  | 
|
557  | 
"_.-!~*'()" # Unreserved characters  | 
|
558  | 
"/;?:@&=+$," # Reserved characters  | 
|
559  | 
"%#" # Extra reserved characters  | 
|
560  | 
)
 | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
561  | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
562  | 
def unescape_for_display(url, encoding):  | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
563  | 
"""Decode what you can for a URL, so that we get a nice looking path.  | 
564  | 
||
565  | 
    This will turn file:// urls into local paths, and try to decode
 | 
|
566  | 
    any portions of a http:// style url that it can.
 | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
567  | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
568  | 
    Any sections of the URL which can't be represented in the encoding or 
 | 
569  | 
    need to stay as escapes are left alone.
 | 
|
570  | 
||
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
571  | 
    :param url: A 7-bit ASCII URL
 | 
572  | 
    :param encoding: The final output encoding
 | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
573  | 
|
574  | 
    :return: A unicode string which can be safely encoded into the 
 | 
|
575  | 
         specified encoding.
 | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
576  | 
    """
 | 
| 
3376.2.4
by Martin Pool
 Remove every assert statement from bzrlib!  | 
577  | 
if encoding is None:  | 
578  | 
raise ValueError('you cannot specify None for the display encoding')  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
579  | 
if url.startswith('file://'):  | 
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
580  | 
try:  | 
581  | 
path = local_path_from_url(url)  | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
582  | 
path.encode(encoding)  | 
583  | 
return path  | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
584  | 
except UnicodeError:  | 
585  | 
return url  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
586  | 
|
587  | 
    # Split into sections to try to decode utf-8
 | 
|
588  | 
res = url.split('/')  | 
|
589  | 
for i in xrange(1, len(res)):  | 
|
590  | 
escaped_chunks = res[i].split('%')  | 
|
591  | 
for j in xrange(1, len(escaped_chunks)):  | 
|
592  | 
item = escaped_chunks[j]  | 
|
593  | 
try:  | 
|
594  | 
escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]  | 
|
595  | 
except KeyError:  | 
|
596  | 
                # Put back the percent symbol
 | 
|
597  | 
escaped_chunks[j] = '%' + item  | 
|
598  | 
except UnicodeDecodeError:  | 
|
599  | 
escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]  | 
|
600  | 
unescaped = ''.join(escaped_chunks)  | 
|
601  | 
try:  | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
602  | 
decoded = unescaped.decode('utf-8')  | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
603  | 
except UnicodeDecodeError:  | 
604  | 
            # If this path segment cannot be properly utf-8 decoded
 | 
|
605  | 
            # after doing unescaping we will just leave it alone
 | 
|
606  | 
            pass
 | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
607  | 
else:  | 
608  | 
try:  | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
609  | 
decoded.encode(encoding)  | 
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
610  | 
except UnicodeEncodeError:  | 
611  | 
                # If this chunk cannot be encoded in the local
 | 
|
612  | 
                # encoding, then we should leave it alone
 | 
|
613  | 
                pass
 | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
614  | 
else:  | 
615  | 
                # Otherwise take the url decoded one
 | 
|
616  | 
res[i] = decoded  | 
|
617  | 
return u'/'.join(res)  | 
|
| 
2512.4.1
by Ian Clatworthy
 Fixes #115491 - 'branch lp:projname' now creates ./projname as exected  | 
618  | 
|
619  | 
||
620  | 
def derive_to_location(from_location):  | 
|
621  | 
"""Derive a TO_LOCATION given a FROM_LOCATION.  | 
|
622  | 
||
623  | 
    The normal case is a FROM_LOCATION of http://foo/bar => bar.
 | 
|
624  | 
    The Right Thing for some logical destinations may differ though
 | 
|
625  | 
    because no / may be present at all. In that case, the result is
 | 
|
626  | 
    the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
 | 
|
627  | 
    This latter case also applies when a Windows drive
 | 
|
628  | 
    is used without a path, e.g. c:foo-bar => foo-bar.
 | 
|
629  | 
    If no /, path separator or : is found, the from_location is returned.
 | 
|
630  | 
    """
 | 
|
631  | 
if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:  | 
|
632  | 
return os.path.basename(from_location.rstrip("/\\"))  | 
|
633  | 
else:  | 
|
634  | 
sep = from_location.find(":")  | 
|
635  | 
if sep > 0:  | 
|
636  | 
return from_location[sep+1:]  | 
|
637  | 
else:  | 
|
638  | 
return from_location  |