bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
1861.2.6
by Alexander Belchenko
 branding: change Bazaar-NG to Bazaar  | 
1  | 
# Bazaar -- distributed version control
 | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
2  | 
#
 | 
3  | 
# Copyright (C) 2006 by Canonical Ltd
 | 
|
4  | 
#
 | 
|
5  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
6  | 
# it under the terms of the GNU General Public License as published by
 | 
|
7  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
8  | 
# (at your option) any later version.
 | 
|
9  | 
#
 | 
|
10  | 
# This program is distributed in the hope that it will be useful,
 | 
|
11  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
12  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
13  | 
# GNU General Public License for more details.
 | 
|
14  | 
#
 | 
|
15  | 
# You should have received a copy of the GNU General Public License
 | 
|
16  | 
# along with this program; if not, write to the Free Software
 | 
|
17  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
18  | 
||
19  | 
"""A collection of function for handling URL operations."""
 | 
|
20  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
21  | 
import os  | 
| 
1711.4.5
by John Arbash Meinel
 the _posix_* routines should use posixpath not os.path, so tests pass on win32  | 
22  | 
from posixpath import split as _posix_split, normpath as _posix_normpath  | 
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
23  | 
import re  | 
24  | 
import sys  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
25  | 
import urllib  | 
26  | 
||
27  | 
import bzrlib.errors as errors  | 
|
28  | 
import bzrlib.osutils  | 
|
29  | 
||
30  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
31  | 
def basename(url, exclude_trailing_slash=True):  | 
32  | 
"""Return the last component of a URL.  | 
|
33  | 
||
34  | 
    :param url: The URL in question
 | 
|
35  | 
    :param exclude_trailing_slash: If the url looks like "path/to/foo/"
 | 
|
36  | 
        ignore the final slash and return 'foo' rather than ''
 | 
|
37  | 
    :return: Just the final component of the URL. This can return ''
 | 
|
38  | 
        if you don't exclude_trailing_slash, or if you are at the
 | 
|
39  | 
        root of the URL.
 | 
|
40  | 
    """
 | 
|
41  | 
return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]  | 
|
42  | 
||
43  | 
||
44  | 
def dirname(url, exclude_trailing_slash=True):  | 
|
45  | 
"""Return the parent directory of the given path.  | 
|
46  | 
||
47  | 
    :param url: Relative or absolute URL
 | 
|
48  | 
    :param exclude_trailing_slash: Remove a final slash
 | 
|
49  | 
        (treat http://host/foo/ as http://host/foo, but
 | 
|
50  | 
        http://host/ stays http://host/)
 | 
|
51  | 
    :return: Everything in the URL except the last path chunk
 | 
|
52  | 
    """
 | 
|
53  | 
    # TODO: jam 20060502 This was named dirname to be consistent
 | 
|
54  | 
    #       with the os functions, but maybe "parent" would be better
 | 
|
55  | 
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]  | 
|
56  | 
||
57  | 
||
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
58  | 
def escape(relpath):  | 
59  | 
"""Escape relpath to be a valid url."""  | 
|
60  | 
if isinstance(relpath, unicode):  | 
|
61  | 
relpath = relpath.encode('utf-8')  | 
|
62  | 
    # After quoting and encoding, the path should be perfectly
 | 
|
63  | 
    # safe as a plain ASCII string, str() just enforces this
 | 
|
64  | 
return str(urllib.quote(relpath))  | 
|
65  | 
||
66  | 
||
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
67  | 
def file_relpath(base, path):  | 
68  | 
"""Compute just the relative sub-portion of a url  | 
|
69  | 
    
 | 
|
70  | 
    This assumes that both paths are already fully specified file:// URLs.
 | 
|
71  | 
    """
 | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
72  | 
assert len(base) >= MIN_ABS_FILEURL_LENGTH, ('Length of base must be equal or'  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
73  | 
' exceed the platform minimum url length (which is %d)' %  | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
74  | 
MIN_ABS_FILEURL_LENGTH)  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
75  | 
|
76  | 
base = local_path_from_url(base)  | 
|
77  | 
path = local_path_from_url(path)  | 
|
78  | 
return escape(bzrlib.osutils.relpath(base, path))  | 
|
79  | 
||
80  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
81  | 
def _find_scheme_and_separator(url):  | 
82  | 
"""Find the scheme separator (://) and the first path separator  | 
|
83  | 
||
84  | 
    This is just a helper functions for other path utilities.
 | 
|
85  | 
    It could probably be replaced by urlparse
 | 
|
86  | 
    """
 | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
87  | 
m = _url_scheme_re.match(url)  | 
88  | 
if not m:  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
89  | 
return None, None  | 
90  | 
||
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
91  | 
scheme = m.group('scheme')  | 
92  | 
path = m.group('path')  | 
|
93  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
94  | 
    # Find the path separating slash
 | 
95  | 
    # (first slash after the ://)
 | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
96  | 
first_path_slash = path.find('/')  | 
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
97  | 
if first_path_slash == -1:  | 
| 
1685.1.56
by John Arbash Meinel
 Fixing _find_scheme_and_separator  | 
98  | 
return len(scheme), None  | 
99  | 
return len(scheme), first_path_slash+len(scheme)+3  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
100  | 
|
101  | 
||
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
102  | 
def join(base, *args):  | 
103  | 
"""Create a URL by joining sections.  | 
|
104  | 
||
105  | 
    This will normalize '..', assuming that paths are absolute
 | 
|
106  | 
    (it assumes no symlinks in either path)
 | 
|
107  | 
||
108  | 
    If any of *args is an absolute URL, it will be treated correctly.
 | 
|
109  | 
    Example:
 | 
|
110  | 
        join('http://foo', 'http://bar') => 'http://bar'
 | 
|
111  | 
        join('http://foo', 'bar') => 'http://foo/bar'
 | 
|
112  | 
        join('http://foo', 'bar', '../baz') => 'http://foo/baz'
 | 
|
113  | 
    """
 | 
|
114  | 
m = _url_scheme_re.match(base)  | 
|
115  | 
scheme = None  | 
|
116  | 
if m:  | 
|
117  | 
scheme = m.group('scheme')  | 
|
118  | 
path = m.group('path').split('/')  | 
|
| 
1711.2.49
by John Arbash Meinel
 urlutils.join should work for root paths.  | 
119  | 
if path[-1:] == ['']:  | 
120  | 
            # Strip off a trailing slash
 | 
|
121  | 
            # This helps both when we are at the root, and when
 | 
|
122  | 
            # 'base' has an extra slash at the end
 | 
|
123  | 
path = path[:-1]  | 
|
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
124  | 
else:  | 
125  | 
path = base.split('/')  | 
|
126  | 
||
127  | 
for arg in args:  | 
|
128  | 
m = _url_scheme_re.match(arg)  | 
|
129  | 
if m:  | 
|
130  | 
            # Absolute URL
 | 
|
131  | 
scheme = m.group('scheme')  | 
|
132  | 
path = m.group('path').split('/')  | 
|
133  | 
else:  | 
|
134  | 
for chunk in arg.split('/'):  | 
|
135  | 
if chunk == '.':  | 
|
136  | 
                    continue
 | 
|
137  | 
elif chunk == '..':  | 
|
138  | 
if len(path) >= 2:  | 
|
139  | 
                        # Don't pop off the host portion
 | 
|
140  | 
path.pop()  | 
|
141  | 
else:  | 
|
142  | 
raise errors.InvalidURLJoin('Cannot go above root',  | 
|
143  | 
base, args)  | 
|
144  | 
else:  | 
|
145  | 
path.append(chunk)  | 
|
| 
1685.1.80
by Wouter van Heyst
 more code cleanup  | 
146  | 
|
| 
1685.1.55
by John Arbash Meinel
 Adding bzrlib.urlutils.join() to handle joining URLs  | 
147  | 
if scheme is None:  | 
148  | 
return '/'.join(path)  | 
|
149  | 
return scheme + '://' + '/'.join(path)  | 
|
150  | 
||
151  | 
||
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
152  | 
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
 | 
153  | 
def _posix_local_path_from_url(url):  | 
|
154  | 
"""Convert a url like file:///path/to/foo into /path/to/foo"""  | 
|
155  | 
if not url.startswith('file:///'):  | 
|
156  | 
raise errors.InvalidURL(url, 'local urls must start with file:///')  | 
|
157  | 
    # We only strip off 2 slashes
 | 
|
158  | 
return unescape(url[len('file://'):])  | 
|
159  | 
||
160  | 
||
161  | 
def _posix_local_path_to_url(path):  | 
|
162  | 
"""Convert a local path like ./foo into a URL like file:///path/to/foo  | 
|
163  | 
||
164  | 
    This also handles transforming escaping unicode characters, etc.
 | 
|
165  | 
    """
 | 
|
166  | 
    # importing directly from posixpath allows us to test this 
 | 
|
167  | 
    # on non-posix platforms
 | 
|
| 
1711.4.5
by John Arbash Meinel
 the _posix_* routines should use posixpath not os.path, so tests pass on win32  | 
168  | 
return 'file://' + escape(_posix_normpath(  | 
169  | 
bzrlib.osutils._posix_abspath(path)))  | 
|
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
170  | 
|
171  | 
||
172  | 
def _win32_local_path_from_url(url):  | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
173  | 
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
174  | 
if not url.startswith('file:///'):  | 
175  | 
raise errors.InvalidURL(url, 'local urls must start with file:///')  | 
|
176  | 
    # We strip off all 3 slashes
 | 
|
177  | 
win32_url = url[len('file:///'):]  | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
178  | 
if (win32_url[0] not in ('abcdefghijklmnopqrstuvwxyz'  | 
179  | 
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')  | 
|
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
180  | 
or win32_url[1] not in '|:'  | 
181  | 
or win32_url[2] != '/'):  | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
182  | 
raise errors.InvalidURL(url, 'Win32 file urls start with'  | 
| 
1711.4.8
by John Arbash Meinel
 switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters  | 
183  | 
' file:///x:/, where x is a valid drive letter')  | 
| 
1809.1.1
by Alexander Belchenko
 win32: convert drive letter to uppercase, not lowercase  | 
184  | 
return win32_url[0].upper() + u':' + unescape(win32_url[2:])  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
185  | 
|
186  | 
||
187  | 
def _win32_local_path_to_url(path):  | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
188  | 
"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
189  | 
|
190  | 
    This also handles transforming escaping unicode characters, etc.
 | 
|
191  | 
    """
 | 
|
192  | 
    # importing directly from ntpath allows us to test this 
 | 
|
| 
1711.4.4
by John Arbash Meinel
 Fix some broken tests because of stupid ntpath.abspath behavior  | 
193  | 
    # on non-win32 platform
 | 
194  | 
    # FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
 | 
|
195  | 
    #       which actually strips trailing space characters.
 | 
|
196  | 
    #       The worst part is that under linux ntpath.abspath has different
 | 
|
197  | 
    #       semantics, since 'nt' is not an available module.
 | 
|
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
198  | 
win32_path = bzrlib.osutils._nt_normpath(  | 
199  | 
bzrlib.osutils._win32_abspath(path)).replace('\\', '/')  | 
|
| 
1809.1.1
by Alexander Belchenko
 win32: convert drive letter to uppercase, not lowercase  | 
200  | 
return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:])  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
201  | 
|
202  | 
||
203  | 
local_path_to_url = _posix_local_path_to_url  | 
|
204  | 
local_path_from_url = _posix_local_path_from_url  | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
205  | 
MIN_ABS_FILEURL_LENGTH = len('file:///')  | 
| 
1711.4.17
by John Arbash Meinel
 [merge] bzr.dev 1790  | 
206  | 
WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')  | 
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
207  | 
|
208  | 
if sys.platform == 'win32':  | 
|
209  | 
local_path_to_url = _win32_local_path_to_url  | 
|
210  | 
local_path_from_url = _win32_local_path_from_url  | 
|
211  | 
||
| 
1711.2.44
by John Arbash Meinel
 Factor out another win32 special case and add platform independent tests for it.  | 
212  | 
MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH  | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
213  | 
|
214  | 
||
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
215  | 
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')  | 
216  | 
||
217  | 
||
218  | 
def normalize_url(url):  | 
|
219  | 
"""Make sure that a path string is in fully normalized URL form.  | 
|
220  | 
    
 | 
|
221  | 
    This handles URLs which have unicode characters, spaces, 
 | 
|
222  | 
    special characters, etc.
 | 
|
223  | 
||
224  | 
    It has two basic modes of operation, depending on whether the
 | 
|
225  | 
    supplied string starts with a url specifier (scheme://) or not.
 | 
|
226  | 
    If it does not have a specifier it is considered a local path,
 | 
|
227  | 
    and will be converted into a file:/// url. Non-ascii characters
 | 
|
228  | 
    will be encoded using utf-8.
 | 
|
229  | 
    If it does have a url specifier, it will be treated as a "hybrid"
 | 
|
230  | 
    URL. Basically, a URL that should have URL special characters already
 | 
|
231  | 
    escaped (like +?&# etc), but may have unicode characters, etc
 | 
|
232  | 
    which would not be valid in a real URL.
 | 
|
233  | 
||
234  | 
    :param url: Either a hybrid URL or a local path
 | 
|
235  | 
    :return: A normalized URL which only includes 7-bit ASCII characters.
 | 
|
236  | 
    """
 | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
237  | 
m = _url_scheme_re.match(url)  | 
238  | 
if not m:  | 
|
239  | 
return local_path_to_url(url)  | 
|
240  | 
if not isinstance(url, unicode):  | 
|
241  | 
for c in url:  | 
|
242  | 
if c not in _url_safe_characters:  | 
|
| 
1685.1.53
by John Arbash Meinel
 Updated normalize_url  | 
243  | 
raise errors.InvalidURL(url, 'URLs can only contain specific'  | 
244  | 
' safe characters (not %r)' % c)  | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
245  | 
return url  | 
246  | 
    # We have a unicode (hybrid) url
 | 
|
247  | 
scheme = m.group('scheme')  | 
|
248  | 
path = list(m.group('path'))  | 
|
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
249  | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
250  | 
for i in xrange(len(path)):  | 
251  | 
if path[i] not in _url_safe_characters:  | 
|
252  | 
chars = path[i].encode('utf-8')  | 
|
253  | 
path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])  | 
|
254  | 
return scheme + '://' + ''.join(path)  | 
|
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
255  | 
|
256  | 
||
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
257  | 
def relative_url(base, other):  | 
258  | 
"""Return a path to other from base.  | 
|
259  | 
||
260  | 
    If other is unrelated to base, return other. Else return a relative path.
 | 
|
261  | 
    This assumes no symlinks as part of the url.
 | 
|
262  | 
    """
 | 
|
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
263  | 
dummy, base_first_slash = _find_scheme_and_separator(base)  | 
264  | 
if base_first_slash is None:  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
265  | 
return other  | 
266  | 
||
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
267  | 
dummy, other_first_slash = _find_scheme_and_separator(other)  | 
268  | 
if other_first_slash is None:  | 
|
269  | 
return other  | 
|
270  | 
||
271  | 
    # this takes care of differing schemes or hosts
 | 
|
272  | 
base_scheme = base[:base_first_slash]  | 
|
273  | 
other_scheme = other[:other_first_slash]  | 
|
274  | 
if base_scheme != other_scheme:  | 
|
275  | 
return other  | 
|
276  | 
||
277  | 
base_path = base[base_first_slash+1:]  | 
|
278  | 
other_path = other[other_first_slash+1:]  | 
|
279  | 
||
280  | 
if base_path.endswith('/'):  | 
|
281  | 
base_path = base_path[:-1]  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
282  | 
|
283  | 
base_sections = base_path.split('/')  | 
|
284  | 
other_sections = other_path.split('/')  | 
|
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
285  | 
|
286  | 
if base_sections == ['']:  | 
|
287  | 
base_sections = []  | 
|
288  | 
if other_sections == ['']:  | 
|
289  | 
other_sections = []  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
290  | 
|
291  | 
output_sections = []  | 
|
292  | 
for b, o in zip(base_sections, other_sections):  | 
|
293  | 
if b != o:  | 
|
294  | 
            break
 | 
|
295  | 
output_sections.append(b)  | 
|
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
296  | 
|
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
297  | 
match_len = len(output_sections)  | 
| 
1685.1.71
by Wouter van Heyst
 change branch.{get,set}_parent to store a relative path but return full urls  | 
298  | 
output_sections = ['..' for x in base_sections[match_len:]]  | 
| 
1685.1.70
by Wouter van Heyst
 working on get_parent, set_parent and relative urls, broken  | 
299  | 
output_sections.extend(other_sections[match_len:])  | 
300  | 
||
301  | 
return "/".join(output_sections) or "."  | 
|
302  | 
||
303  | 
||
| 
1711.2.43
by John Arbash Meinel
 Split out win32 specific code so that it can be tested on all platforms.  | 
304  | 
def _win32_extract_drive_letter(url_base, path):  | 
305  | 
"""On win32 the drive letter needs to be added to the url base."""  | 
|
306  | 
    # Strip off the drive letter
 | 
|
307  | 
    # path is currently /C:/foo
 | 
|
308  | 
if len(path) < 3 or path[2] not in ':|' or path[3] != '/':  | 
|
309  | 
raise errors.InvalidURL(url_base + path,  | 
|
310  | 
'win32 file:/// paths need a drive letter')  | 
|
311  | 
url_base += path[0:3] # file:// + /C:  | 
|
312  | 
path = path[3:] # /foo  | 
|
313  | 
return url_base, path  | 
|
314  | 
||
315  | 
||
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
316  | 
def split(url, exclude_trailing_slash=True):  | 
317  | 
"""Split a URL into its parent directory and a child directory.  | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
318  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
319  | 
    :param url: A relative or absolute URL
 | 
320  | 
    :param exclude_trailing_slash: Strip off a final '/' if it is part
 | 
|
321  | 
        of the path (but not if it is part of the protocol specification)
 | 
|
| 
1685.1.61
by Martin Pool
 [broken] Change BzrDir._make_tail to use urlutils.split  | 
322  | 
|
323  | 
    :return: (parent_url, child_dir).  child_dir may be the empty string if we're at 
 | 
|
324  | 
        the root.
 | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
325  | 
    """
 | 
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
326  | 
scheme_loc, first_path_slash = _find_scheme_and_separator(url)  | 
327  | 
||
328  | 
if first_path_slash is None:  | 
|
329  | 
        # We have either a relative path, or no separating slash
 | 
|
330  | 
if scheme_loc is None:  | 
|
331  | 
            # Relative path
 | 
|
332  | 
if exclude_trailing_slash and url.endswith('/'):  | 
|
333  | 
url = url[:-1]  | 
|
334  | 
return _posix_split(url)  | 
|
335  | 
else:  | 
|
336  | 
            # Scheme with no path
 | 
|
337  | 
return url, ''  | 
|
338  | 
||
339  | 
    # We have a fully defined path
 | 
|
340  | 
url_base = url[:first_path_slash] # http://host, file://  | 
|
341  | 
path = url[first_path_slash:] # /file/foo  | 
|
342  | 
||
343  | 
if sys.platform == 'win32' and url.startswith('file:///'):  | 
|
344  | 
        # Strip off the drive letter
 | 
|
| 
1711.2.43
by John Arbash Meinel
 Split out win32 specific code so that it can be tested on all platforms.  | 
345  | 
        # url_base is currently file://
 | 
| 
1711.2.39
by John Arbash Meinel
 Fix bzrlib.urlutils.split() to work properly on win32 local paths.  | 
346  | 
        # path is currently /C:/foo
 | 
| 
1711.2.43
by John Arbash Meinel
 Split out win32 specific code so that it can be tested on all platforms.  | 
347  | 
url_base, path = _win32_extract_drive_letter(url_base, path)  | 
348  | 
        # now it should be file:///C: and /foo
 | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
349  | 
|
350  | 
if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):  | 
|
351  | 
path = path[:-1]  | 
|
352  | 
head, tail = _posix_split(path)  | 
|
353  | 
return url_base + head, tail  | 
|
354  | 
||
| 
1685.1.46
by John Arbash Meinel
 Sorting functions by name.  | 
355  | 
|
| 
1711.2.44
by John Arbash Meinel
 Factor out another win32 special case and add platform independent tests for it.  | 
356  | 
def _win32_strip_local_trailing_slash(url):  | 
357  | 
"""Strip slashes after the drive letter"""  | 
|
358  | 
if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:  | 
|
359  | 
return url[:-1]  | 
|
360  | 
else:  | 
|
361  | 
return url  | 
|
362  | 
||
363  | 
||
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
364  | 
def strip_trailing_slash(url):  | 
365  | 
"""Strip trailing slash, except for root paths.  | 
|
366  | 
||
367  | 
    The definition of 'root path' is platform-dependent.
 | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
368  | 
    This assumes that all URLs are valid netloc urls, such that they
 | 
369  | 
    form:
 | 
|
370  | 
    scheme://host/path
 | 
|
371  | 
    It searches for ://, and then refuses to remove the next '/'.
 | 
|
372  | 
    It can also handle relative paths
 | 
|
373  | 
    Examples:
 | 
|
374  | 
        path/to/foo       => path/to/foo
 | 
|
375  | 
        path/to/foo/      => path/to/foo
 | 
|
376  | 
        http://host/path/ => http://host/path
 | 
|
377  | 
        http://host/path  => http://host/path
 | 
|
378  | 
        http://host/      => http://host/
 | 
|
379  | 
        file:///          => file:///
 | 
|
380  | 
        file:///foo/      => file:///foo
 | 
|
381  | 
        # This is unique on win32 platforms, and is the only URL
 | 
|
382  | 
        # format which does it differently.
 | 
|
| 
1711.4.8
by John Arbash Meinel
 switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters  | 
383  | 
        file:///c|/       => file:///c:/
 | 
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
384  | 
    """
 | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
385  | 
if not url.endswith('/'):  | 
386  | 
        # Nothing to do
 | 
|
387  | 
return url  | 
|
388  | 
if sys.platform == 'win32' and url.startswith('file:///'):  | 
|
| 
1711.2.44
by John Arbash Meinel
 Factor out another win32 special case and add platform independent tests for it.  | 
389  | 
return _win32_strip_local_trailing_slash(url)  | 
| 
1685.1.80
by Wouter van Heyst
 more code cleanup  | 
390  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
391  | 
scheme_loc, first_path_slash = _find_scheme_and_separator(url)  | 
392  | 
if scheme_loc is None:  | 
|
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
393  | 
        # This is a relative path, as it has no scheme
 | 
394  | 
        # so just chop off the last character
 | 
|
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
395  | 
return url[:-1]  | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
396  | 
|
| 
1685.1.49
by John Arbash Meinel
 Added bzrlib.urlutils.split and basename + dirname  | 
397  | 
if first_path_slash is None or first_path_slash == len(url)-1:  | 
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
398  | 
        # Don't chop off anything if the only slash is the path
 | 
399  | 
        # separating slash
 | 
|
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
400  | 
return url  | 
401  | 
||
| 
1685.1.48
by John Arbash Meinel
 Updated strip_trailing_slash to support lots more url stuff, added tests  | 
402  | 
return url[:-1]  | 
403  | 
||
| 
1685.1.47
by John Arbash Meinel
 s comes before u  | 
404  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
405  | 
def unescape(url):  | 
406  | 
"""Unescape relpath from url format.  | 
|
407  | 
||
408  | 
    This returns a Unicode path from a URL
 | 
|
409  | 
    """
 | 
|
410  | 
    # jam 20060427 URLs are supposed to be ASCII only strings
 | 
|
411  | 
    #       If they are passed in as unicode, urllib.unquote
 | 
|
412  | 
    #       will return a UNICODE string, which actually contains
 | 
|
413  | 
    #       utf-8 bytes. So we have to ensure that they are
 | 
|
414  | 
    #       plain ASCII strings, or the final .decode will
 | 
|
415  | 
    #       try to encode the UNICODE => ASCII, and then decode
 | 
|
416  | 
    #       it into utf-8.
 | 
|
417  | 
try:  | 
|
418  | 
url = str(url)  | 
|
419  | 
except UnicodeError, e:  | 
|
420  | 
raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))  | 
|
| 
1685.1.80
by Wouter van Heyst
 more code cleanup  | 
421  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
422  | 
unquoted = urllib.unquote(url)  | 
423  | 
try:  | 
|
424  | 
unicode_path = unquoted.decode('utf-8')  | 
|
425  | 
except UnicodeError, e:  | 
|
426  | 
raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))  | 
|
427  | 
return unicode_path  | 
|
428  | 
||
429  | 
||
430  | 
# These are characters that if escaped, should stay that way
 | 
|
431  | 
_no_decode_chars = ';/?:@&=+$,#'  | 
|
432  | 
_no_decode_ords = [ord(c) for c in _no_decode_chars]  | 
|
433  | 
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]  | 
|
434  | 
+ ['%02X' % o for o in _no_decode_ords])  | 
|
| 
1685.1.50
by John Arbash Meinel
 Added an re for handling scheme paths.  | 
435  | 
_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]  | 
436  | 
+ [('%02X' % o, chr(o)) for o in range(256)]))  | 
|
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
437  | 
#These entries get mapped to themselves
 | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
438  | 
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)  | 
| 
1685.1.51
by John Arbash Meinel
 Working on getting normalize_url working.  | 
439  | 
|
440  | 
# These characters should not be escaped
 | 
|
441  | 
_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz'  | 
|
442  | 
                        'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 | 
|
443  | 
'0123456789' '_.-/'  | 
|
444  | 
';?:@&=+$,%#')  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
445  | 
|
446  | 
||
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
447  | 
def unescape_for_display(url, encoding):  | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
448  | 
"""Decode what you can for a URL, so that we get a nice looking path.  | 
449  | 
||
450  | 
    This will turn file:// urls into local paths, and try to decode
 | 
|
451  | 
    any portions of a http:// style url that it can.
 | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
452  | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
453  | 
    Any sections of the URL which can't be represented in the encoding or 
 | 
454  | 
    need to stay as escapes are left alone.
 | 
|
455  | 
||
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
456  | 
    :param url: A 7-bit ASCII URL
 | 
457  | 
    :param encoding: The final output encoding
 | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
458  | 
|
459  | 
    :return: A unicode string which can be safely encoded into the 
 | 
|
460  | 
         specified encoding.
 | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
461  | 
    """
 | 
| 
1711.2.40
by John Arbash Meinel
 codecs.getwriter() doesn't set '.encoding' properly, so do the work for it.  | 
462  | 
assert encoding is not None, 'you cannot specify None for the display encoding.'  | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
463  | 
if url.startswith('file://'):  | 
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
464  | 
try:  | 
465  | 
path = local_path_from_url(url)  | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
466  | 
path.encode(encoding)  | 
467  | 
return path  | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
468  | 
except UnicodeError:  | 
469  | 
return url  | 
|
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
470  | 
|
471  | 
    # Split into sections to try to decode utf-8
 | 
|
472  | 
res = url.split('/')  | 
|
473  | 
for i in xrange(1, len(res)):  | 
|
474  | 
escaped_chunks = res[i].split('%')  | 
|
475  | 
for j in xrange(1, len(escaped_chunks)):  | 
|
476  | 
item = escaped_chunks[j]  | 
|
477  | 
try:  | 
|
478  | 
escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]  | 
|
479  | 
except KeyError:  | 
|
480  | 
                # Put back the percent symbol
 | 
|
481  | 
escaped_chunks[j] = '%' + item  | 
|
482  | 
except UnicodeDecodeError:  | 
|
483  | 
escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]  | 
|
484  | 
unescaped = ''.join(escaped_chunks)  | 
|
485  | 
try:  | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
486  | 
decoded = unescaped.decode('utf-8')  | 
| 
1685.1.45
by John Arbash Meinel
 Moved url functions into bzrlib.urlutils  | 
487  | 
except UnicodeDecodeError:  | 
488  | 
            # If this path segment cannot be properly utf-8 decoded
 | 
|
489  | 
            # after doing unescaping we will just leave it alone
 | 
|
490  | 
            pass
 | 
|
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
491  | 
else:  | 
492  | 
try:  | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
493  | 
decoded.encode(encoding)  | 
| 
1685.1.54
by John Arbash Meinel
 url_for_display now makes sure output can be properly encoded.  | 
494  | 
except UnicodeEncodeError:  | 
495  | 
                # If this chunk cannot be encoded in the local
 | 
|
496  | 
                # encoding, then we should leave it alone
 | 
|
497  | 
                pass
 | 
|
| 
1685.1.58
by Martin Pool
 urlutils.unescape_for_display should return Unicode  | 
498  | 
else:  | 
499  | 
                # Otherwise take the url decoded one
 | 
|
500  | 
res[i] = decoded  | 
|
501  | 
return u'/'.join(res)  |