/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2006-2010 Canonical Ltd
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
16
17
"""A collection of function for handling URL operations."""
18
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
19
import os
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
20
import re
21
import sys
1996.3.12 by John Arbash Meinel
Change how 'revision' is imported to avoid problems later
22
23
from bzrlib.lazy_import import lazy_import
24
lazy_import(globals(), """
6015.39.2 by Florian Vichot
Fixed an infinite loop when creating a repo at the root of the filesystem,
25
from posixpath import split as _posix_split
3242.3.26 by Aaron Bentley
Implement rebase_url
26
import urlparse
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
27
1996.3.12 by John Arbash Meinel
Change how 'revision' is imported to avoid problems later
28
from bzrlib import (
29
    errors,
30
    osutils,
31
    )
32
""")
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
33
34
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
35
def basename(url, exclude_trailing_slash=True):
36
    """Return the last component of a URL.
37
38
    :param url: The URL in question
39
    :param exclude_trailing_slash: If the url looks like "path/to/foo/"
40
        ignore the final slash and return 'foo' rather than ''
41
    :return: Just the final component of the URL. This can return ''
42
        if you don't exclude_trailing_slash, or if you are at the
43
        root of the URL.
44
    """
45
    return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]
46
47
48
def dirname(url, exclude_trailing_slash=True):
49
    """Return the parent directory of the given path.
50
51
    :param url: Relative or absolute URL
52
    :param exclude_trailing_slash: Remove a final slash
53
        (treat http://host/foo/ as http://host/foo, but
54
        http://host/ stays http://host/)
55
    :return: Everything in the URL except the last path chunk
56
    """
57
    # TODO: jam 20060502 This was named dirname to be consistent
58
    #       with the os functions, but maybe "parent" would be better
59
    return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
60
61
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
62
# Private copies of quote and unquote, copied from Python's
63
# urllib module because urllib unconditionally imports socket, which imports
64
# ssl.
65
66
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
67
               'abcdefghijklmnopqrstuvwxyz'
68
               '0123456789' '_.-')
69
_safe_map = {}
70
for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
6379.4.8 by Jelmer Vernooij
Fix 2.6 compatibility.
71
    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
72
_safe_quoters = {}
73
74
75
def quote(s, safe='/'):
76
    """quote('abc def') -> 'abc%20def'
77
78
    Each part of a URL, e.g. the path info, the query, etc., has a
79
    different set of reserved characters that must be quoted.
80
81
    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
82
    the following reserved characters.
83
84
    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
85
                  "$" | ","
86
87
    Each of these characters is reserved in some component of a URL,
88
    but not necessarily in all of them.
89
90
    By default, the quote function is intended for quoting the path
91
    section of a URL.  Thus, it will not encode '/'.  This character
92
    is reserved, but in typical usage the quote function is being
93
    called on a path where the existing slash characters are used as
94
    reserved characters.
95
    """
96
    # fastpath
97
    if not s:
98
        if s is None:
99
            raise TypeError('None object cannot be quoted')
100
        return s
101
    cachekey = (safe, always_safe)
102
    try:
103
        (quoter, safe) = _safe_quoters[cachekey]
104
    except KeyError:
105
        safe_map = _safe_map.copy()
106
        safe_map.update([(c, c) for c in safe])
107
        quoter = safe_map.__getitem__
108
        safe = always_safe + safe
109
        _safe_quoters[cachekey] = (quoter, safe)
110
    if not s.rstrip(safe):
111
        return s
112
    return ''.join(map(quoter, s))
113
114
115
_hexdig = '0123456789ABCDEFabcdef'
116
_hextochr = dict((a + b, chr(int(a + b, 16)))
117
                 for a in _hexdig for b in _hexdig)
118
119
def unquote(s):
120
    """unquote('abc%20def') -> 'abc def'."""
121
    res = s.split('%')
122
    # fastpath
123
    if len(res) == 1:
124
        return s
125
    s = res[0]
126
    for item in res[1:]:
127
        try:
128
            s += _hextochr[item[:2]] + item[2:]
129
        except KeyError:
130
            s += '%' + item
131
        except UnicodeDecodeError:
132
            s += unichr(int(item[:2], 16)) + item[2:]
133
    return s
134
135
5268.7.11 by Jelmer Vernooij
revert some unnecessary changes
136
def escape(relpath):
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
137
    """Escape relpath to be a valid url."""
138
    if isinstance(relpath, unicode):
139
        relpath = relpath.encode('utf-8')
140
    # After quoting and encoding, the path should be perfectly
141
    # safe as a plain ASCII string, str() just enforces this
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
142
    return str(quote(relpath, safe='/~'))
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
143
144
1685.1.46 by John Arbash Meinel
Sorting functions by name.
145
def file_relpath(base, path):
146
    """Compute just the relative sub-portion of a url
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
147
1685.1.46 by John Arbash Meinel
Sorting functions by name.
148
    This assumes that both paths are already fully specified file:// URLs.
149
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
150
    if len(base) < MIN_ABS_FILEURL_LENGTH:
4539.1.1 by Andrew Bennetts
Improve error message in osutils.file_relpath.
151
        raise ValueError('Length of base (%r) must equal or'
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
152
            ' exceed the platform minimum url length (which is %d)' %
4539.1.1 by Andrew Bennetts
Improve error message in osutils.file_relpath.
153
            (base, MIN_ABS_FILEURL_LENGTH))
6240.4.3 by Martin Packman
Use filesystem rather than url path function to strip terminal slash
154
    base = osutils.normpath(local_path_from_url(base))
155
    path = osutils.normpath(local_path_from_url(path))
1996.3.12 by John Arbash Meinel
Change how 'revision' is imported to avoid problems later
156
    return escape(osutils.relpath(base, path))
1685.1.46 by John Arbash Meinel
Sorting functions by name.
157
158
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
159
def _find_scheme_and_separator(url):
160
    """Find the scheme separator (://) and the first path separator
161
162
    This is just a helper functions for other path utilities.
163
    It could probably be replaced by urlparse
164
    """
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
165
    m = _url_scheme_re.match(url)
166
    if not m:
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
167
        return None, None
168
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
169
    scheme = m.group('scheme')
170
    path = m.group('path')
171
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
172
    # Find the path separating slash
173
    # (first slash after the ://)
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
174
    first_path_slash = path.find('/')
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
175
    if first_path_slash == -1:
1685.1.56 by John Arbash Meinel
Fixing _find_scheme_and_separator
176
        return len(scheme), None
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
177
    return len(scheme), first_path_slash+m.start('path')
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
178
179
5254.2.1 by Gordon Tyler
Fixed how get_transport's convert_path_to_url tests whether a path is actually a URL.
180
def is_url(url):
181
    """Tests whether a URL is in actual fact a URL."""
182
    return _url_scheme_re.match(url) is not None
183
184
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
185
def join(base, *args):
186
    """Create a URL by joining sections.
187
188
    This will normalize '..', assuming that paths are absolute
189
    (it assumes no symlinks in either path)
190
191
    If any of *args is an absolute URL, it will be treated correctly.
192
    Example:
193
        join('http://foo', 'http://bar') => 'http://bar'
194
        join('http://foo', 'bar') => 'http://foo/bar'
195
        join('http://foo', 'bar', '../baz') => 'http://foo/baz'
196
    """
2018.5.100 by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.
197
    if not args:
198
        return base
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
199
    scheme_end, path_start = _find_scheme_and_separator(base)
200
    if scheme_end is None and path_start is None:
201
        path_start = 0
202
    elif path_start is None:
203
        path_start = len(base)
204
    path = base[path_start:]
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
205
    for arg in args:
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
206
        arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)
207
        if arg_scheme_end is None and arg_path_start is None:
208
            arg_path_start = 0
209
        elif arg_path_start is None:
210
            arg_path_start = len(arg)
5254.1.5 by Gordon Tyler
Fixes according to spiv's review.
211
        if arg_scheme_end is not None:
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
212
            base = arg
213
            path = arg[arg_path_start:]
214
            scheme_end = arg_scheme_end
215
            path_start = arg_path_start
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
216
        else:
2018.5.54 by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone
217
            path = joinpath(path, arg)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
218
    return base[:path_start] + path
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
219
220
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
221
def joinpath(base, *args):
222
    """Join URL path segments to a URL path segment.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
223
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
224
    This is somewhat like osutils.joinpath, but intended for URLs.
225
226
    XXX: this duplicates some normalisation logic, and also duplicates a lot of
227
    path handling logic that already exists in some Transport implementations.
228
    We really should try to have exactly one place in the code base responsible
229
    for combining paths of URLs.
230
    """
2018.5.100 by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.
231
    path = base.split('/')
232
    if len(path) > 1 and path[-1] == '':
233
        #If the path ends in a trailing /, remove it.
234
        path.pop()
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
235
    for arg in args:
236
        if arg.startswith('/'):
237
            path = []
238
        for chunk in arg.split('/'):
239
            if chunk == '.':
240
                continue
241
            elif chunk == '..':
242
                if path == ['']:
243
                    raise errors.InvalidURLJoin('Cannot go above root',
244
                            base, args)
245
                path.pop()
246
            else:
247
                path.append(chunk)
248
    if path == ['']:
249
        return '/'
250
    else:
251
        return '/'.join(path)
252
253
1685.1.46 by John Arbash Meinel
Sorting functions by name.
254
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
255
def _posix_local_path_from_url(url):
256
    """Convert a url like file:///path/to/foo into /path/to/foo"""
5268.7.21 by Jelmer Vernooij
Cope with segment parameters in urls.
257
    url = split_segment_parameters_raw(url)[0]
4828.1.1 by Michael Hudson
test and fix
258
    file_localhost_prefix = 'file://localhost/'
259
    if url.startswith(file_localhost_prefix):
260
        path = url[len(file_localhost_prefix) - 1:]
261
    elif not url.startswith('file:///'):
262
        raise errors.InvalidURL(
263
            url, 'local urls must start with file:/// or file://localhost/')
264
    else:
265
        path = url[len('file://'):]
1685.1.46 by John Arbash Meinel
Sorting functions by name.
266
    # We only strip off 2 slashes
4828.1.1 by Michael Hudson
test and fix
267
    return unescape(path)
1685.1.46 by John Arbash Meinel
Sorting functions by name.
268
269
270
def _posix_local_path_to_url(path):
271
    """Convert a local path like ./foo into a URL like file:///path/to/foo
272
273
    This also handles transforming escaping unicode characters, etc.
274
    """
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
275
    # importing directly from posixpath allows us to test this
1685.1.46 by John Arbash Meinel
Sorting functions by name.
276
    # on non-posix platforms
6015.39.2 by Florian Vichot
Fixed an infinite loop when creating a repo at the root of the filesystem,
277
    return 'file://' + escape(osutils._posix_abspath(path))
1685.1.46 by John Arbash Meinel
Sorting functions by name.
278
279
280
def _win32_local_path_from_url(url):
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
281
    """Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
282
    if not url.startswith('file://'):
283
        raise errors.InvalidURL(url, 'local urls must start with file:///, '
284
                                     'UNC path urls must start with file://')
5268.7.21 by Jelmer Vernooij
Cope with segment parameters in urls.
285
    url = split_segment_parameters_raw(url)[0]
1685.1.46 by John Arbash Meinel
Sorting functions by name.
286
    # We strip off all 3 slashes
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
287
    win32_url = url[len('file:'):]
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
288
    # check for UNC path: //HOST/path
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
289
    if not win32_url.startswith('///'):
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
290
        if (win32_url[2] == '/'
291
            or win32_url[3] in '|:'):
292
            raise errors.InvalidURL(url, 'Win32 UNC path urls'
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
293
                ' have form file://HOST/path')
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
294
        return unescape(win32_url)
3503.1.2 by adwi2
Permits Windows to serve all paths on all drives.
295
296
    # allow empty paths so we can serve all roots
297
    if win32_url == '///':
298
        return '/'
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
299
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
300
    # usual local path with drive letter
5510.2.3 by Jared Bunting
Changed _win32_local_path_from_url to not allow "file:///C:" form.
301
    if (len(win32_url) < 6
5510.2.1 by Jared Bunting
Modified _win32_local_path_from_url to:
302
        or win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
303
                                'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
304
        or win32_url[4] not in  '|:'
5510.2.3 by Jared Bunting
Changed _win32_local_path_from_url to not allow "file:///C:" form.
305
        or win32_url[5] != '/'):
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
306
        raise errors.InvalidURL(url, 'Win32 file urls start with'
1711.4.8 by John Arbash Meinel
switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters
307
                ' file:///x:/, where x is a valid drive letter')
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
308
    return win32_url[3].upper() + u':' + unescape(win32_url[5:])
1685.1.46 by John Arbash Meinel
Sorting functions by name.
309
310
311
def _win32_local_path_to_url(path):
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
312
    """Convert a local path like ./foo into a URL like file:///C:/path/to/foo
1685.1.46 by John Arbash Meinel
Sorting functions by name.
313
314
    This also handles transforming escaping unicode characters, etc.
315
    """
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
316
    # importing directly from ntpath allows us to test this
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
317
    # on non-win32 platform
318
    # FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
319
    #       which actually strips trailing space characters.
5278.1.5 by Martin Pool
Correct more sloppy use of the term 'Linux'
320
    #       The worst part is that on linux ntpath.abspath has different
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
321
    #       semantics, since 'nt' is not an available module.
3503.1.1 by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url
322
    if path == '/':
3503.1.2 by adwi2
Permits Windows to serve all paths on all drives.
323
        return 'file:///'
3503.1.1 by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url
324
2279.4.2 by Alexander Belchenko
Don't do normpath after abspath, because this function is called inside abspath
325
    win32_path = osutils._win32_abspath(path)
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
326
    # check for UNC path \\HOST\path
327
    if win32_path.startswith('//'):
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
328
        return 'file:' + escape(win32_path)
3234.3.1 by Alexander Belchenko
ensure that local_path_to_url() always returns plain string, not unicode.
329
    return ('file:///' + str(win32_path[0].upper()) + ':' +
330
        escape(win32_path[2:]))
1685.1.46 by John Arbash Meinel
Sorting functions by name.
331
332
333
local_path_to_url = _posix_local_path_to_url
334
local_path_from_url = _posix_local_path_from_url
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
335
MIN_ABS_FILEURL_LENGTH = len('file:///')
1711.4.17 by John Arbash Meinel
[merge] bzr.dev 1790
336
WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')
1685.1.46 by John Arbash Meinel
Sorting functions by name.
337
338
if sys.platform == 'win32':
339
    local_path_to_url = _win32_local_path_to_url
340
    local_path_from_url = _win32_local_path_from_url
341
1711.2.44 by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it.
342
    MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
343
344
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
345
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
346
_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
347
348
349
def _unescape_safe_chars(matchobj):
350
    """re.sub callback to convert hex-escapes to plain characters (if safe).
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
351
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
352
    e.g. '%7E' will be converted to '~'.
353
    """
354
    hex_digits = matchobj.group(0)[1:]
355
    char = chr(int(hex_digits, 16))
356
    if char in _url_dont_escape_characters:
357
        return char
358
    else:
359
        return matchobj.group(0).upper()
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
360
361
362
def normalize_url(url):
363
    """Make sure that a path string is in fully normalized URL form.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
364
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
365
    This handles URLs which have unicode characters, spaces,
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
366
    special characters, etc.
367
368
    It has two basic modes of operation, depending on whether the
369
    supplied string starts with a url specifier (scheme://) or not.
370
    If it does not have a specifier it is considered a local path,
371
    and will be converted into a file:/// url. Non-ascii characters
372
    will be encoded using utf-8.
373
    If it does have a url specifier, it will be treated as a "hybrid"
374
    URL. Basically, a URL that should have URL special characters already
375
    escaped (like +?&# etc), but may have unicode characters, etc
376
    which would not be valid in a real URL.
377
378
    :param url: Either a hybrid URL or a local path
379
    :return: A normalized URL which only includes 7-bit ASCII characters.
380
    """
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
381
    scheme_end, path_start = _find_scheme_and_separator(url)
382
    if scheme_end is None:
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
383
        return local_path_to_url(url)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
384
    prefix = url[:path_start]
385
    path = url[path_start:]
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
386
    if not isinstance(url, unicode):
387
        for c in url:
388
            if c not in _url_safe_characters:
1685.1.53 by John Arbash Meinel
Updated normalize_url
389
                raise errors.InvalidURL(url, 'URLs can only contain specific'
390
                                            ' safe characters (not %r)' % c)
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
391
        path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
392
        return str(prefix + ''.join(path))
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
393
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
394
    # We have a unicode (hybrid) url
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
395
    path_chars = list(path)
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
396
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
397
    for i in xrange(len(path_chars)):
398
        if path_chars[i] not in _url_safe_characters:
399
            chars = path_chars[i].encode('utf-8')
400
            path_chars[i] = ''.join(
401
                ['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
402
    path = ''.join(path_chars)
403
    path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
404
    return str(prefix + path)
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
405
406
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
407
def relative_url(base, other):
408
    """Return a path to other from base.
409
410
    If other is unrelated to base, return other. Else return a relative path.
411
    This assumes no symlinks as part of the url.
412
    """
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
413
    dummy, base_first_slash = _find_scheme_and_separator(base)
414
    if base_first_slash is None:
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
415
        return other
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
416
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
417
    dummy, other_first_slash = _find_scheme_and_separator(other)
418
    if other_first_slash is None:
419
        return other
420
421
    # this takes care of differing schemes or hosts
422
    base_scheme = base[:base_first_slash]
423
    other_scheme = other[:other_first_slash]
424
    if base_scheme != other_scheme:
425
        return other
3139.2.1 by Alexander Belchenko
bugfix #90847: fix problem with parent location on another logical drive
426
    elif sys.platform == 'win32' and base_scheme == 'file://':
427
        base_drive = base[base_first_slash+1:base_first_slash+3]
428
        other_drive = other[other_first_slash+1:other_first_slash+3]
429
        if base_drive != other_drive:
430
            return other
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
431
432
    base_path = base[base_first_slash+1:]
433
    other_path = other[other_first_slash+1:]
434
435
    if base_path.endswith('/'):
436
        base_path = base_path[:-1]
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
437
438
    base_sections = base_path.split('/')
439
    other_sections = other_path.split('/')
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
440
441
    if base_sections == ['']:
442
        base_sections = []
443
    if other_sections == ['']:
444
        other_sections = []
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
445
446
    output_sections = []
447
    for b, o in zip(base_sections, other_sections):
448
        if b != o:
449
            break
450
        output_sections.append(b)
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
451
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
452
    match_len = len(output_sections)
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
453
    output_sections = ['..' for x in base_sections[match_len:]]
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
454
    output_sections.extend(other_sections[match_len:])
455
456
    return "/".join(output_sections) or "."
457
458
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
459
def _win32_extract_drive_letter(url_base, path):
460
    """On win32 the drive letter needs to be added to the url base."""
461
    # Strip off the drive letter
462
    # path is currently /C:/foo
6123.3.2 by Martin
Treat file:///C: as invalid on windows instead of throwing an IndexError
463
    if len(path) < 4 or path[2] not in ':|' or path[3] != '/':
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
464
        raise errors.InvalidURL(url_base + path,
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
465
            'win32 file:/// paths need a drive letter')
466
    url_base += path[0:3] # file:// + /C:
467
    path = path[3:] # /foo
468
    return url_base, path
469
470
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
471
def split(url, exclude_trailing_slash=True):
472
    """Split a URL into its parent directory and a child directory.
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
473
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
474
    :param url: A relative or absolute URL
475
    :param exclude_trailing_slash: Strip off a final '/' if it is part
476
        of the path (but not if it is part of the protocol specification)
1685.1.61 by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split
477
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
478
    :return: (parent_url, child_dir).  child_dir may be the empty string if we're at
1685.1.61 by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split
479
        the root.
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
480
    """
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
481
    scheme_loc, first_path_slash = _find_scheme_and_separator(url)
482
483
    if first_path_slash is None:
484
        # We have either a relative path, or no separating slash
485
        if scheme_loc is None:
486
            # Relative path
487
            if exclude_trailing_slash and url.endswith('/'):
488
                url = url[:-1]
489
            return _posix_split(url)
490
        else:
491
            # Scheme with no path
492
            return url, ''
493
494
    # We have a fully defined path
495
    url_base = url[:first_path_slash] # http://host, file://
496
    path = url[first_path_slash:] # /file/foo
497
498
    if sys.platform == 'win32' and url.startswith('file:///'):
499
        # Strip off the drive letter
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
500
        # url_base is currently file://
1711.2.39 by John Arbash Meinel
Fix bzrlib.urlutils.split() to work properly on win32 local paths.
501
        # path is currently /C:/foo
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
502
        url_base, path = _win32_extract_drive_letter(url_base, path)
503
        # now it should be file:///C: and /foo
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
504
505
    if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):
506
        path = path[:-1]
507
    head, tail = _posix_split(path)
508
    return url_base + head, tail
509
1685.1.46 by John Arbash Meinel
Sorting functions by name.
510
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
511
def split_segment_parameters_raw(url):
5163.2.1 by Jelmer Vernooij
Add urlutils.split_subsegments.
512
    """Split the subsegment of the last segment of a URL.
513
514
    :param url: A relative or absolute URL
515
    :return: (url, subsegments)
516
    """
6278.1.4 by Martin Packman
Fix fallout on per_transport tests from suspect terminal slash handling
517
    # GZ 2011-11-18: Dodgy removing the terminal slash like this, function
518
    #                operates on urls not url+segments, and Transport classes
6278.1.5 by Martin Packman
Spelling tweaks suggested by vila in review
519
    #                should not be blindly adding slashes in the first place. 
6278.1.4 by Martin Packman
Fix fallout on per_transport tests from suspect terminal slash handling
520
    lurl = strip_trailing_slash(url)
6278.1.5 by Martin Packman
Spelling tweaks suggested by vila in review
521
    # Segments begin at first comma after last forward slash, if one exists
6278.1.4 by Martin Packman
Fix fallout on per_transport tests from suspect terminal slash handling
522
    segment_start = lurl.find(",", lurl.rfind("/")+1)
6278.1.3 by Martin Packman
Stop using urlutils.split before segment parameters have been removed
523
    if segment_start == -1:
5163.2.1 by Jelmer Vernooij
Add urlutils.split_subsegments.
524
        return (url, [])
6278.1.5 by Martin Packman
Spelling tweaks suggested by vila in review
525
    return (lurl[:segment_start], lurl[segment_start+1:].split(","))
5163.2.1 by Jelmer Vernooij
Add urlutils.split_subsegments.
526
527
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
528
def split_segment_parameters(url):
529
    """Split the segment parameters of the last segment of a URL.
530
531
    :param url: A relative or absolute URL
532
    :return: (url, segment_parameters)
533
    """
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
534
    (base_url, subsegments) = split_segment_parameters_raw(url)
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
535
    parameters = {}
536
    for subsegment in subsegments:
537
        (key, value) = subsegment.split("=", 1)
538
        parameters[key] = value
539
    return (base_url, parameters)
540
541
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
542
def join_segment_parameters_raw(base, *subsegments):
5163.2.7 by Jelmer Vernooij
Add type checking.
543
    """Create a new URL by adding subsegments to an existing one. 
544
545
    This adds the specified subsegments to the last path in the specified
546
    base URL. The subsegments should be bytestrings.
5163.2.2 by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments.
547
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
548
    :note: You probably want to use join_segment_parameters instead.
5163.2.2 by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments.
549
    """
550
    if not subsegments:
551
        return base
552
    for subsegment in subsegments:
5163.2.7 by Jelmer Vernooij
Add type checking.
553
        if type(subsegment) is not str:
554
            raise TypeError("Subsegment %r is not a bytestring" % subsegment)
5163.2.2 by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments.
555
        if "," in subsegment:
556
            raise errors.InvalidURLJoin(", exists in subsegments",
557
                                        base, subsegments)
558
    return ",".join((base,) + subsegments)
559
560
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
561
def join_segment_parameters(url, parameters):
562
    """Create a new URL by adding segment parameters to an existing one.
563
5163.2.7 by Jelmer Vernooij
Add type checking.
564
    The parameters of the last segment in the URL will be updated; if a
565
    parameter with the same key already exists it will be overwritten.
566
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
567
    :param url: A URL, as string
5163.2.7 by Jelmer Vernooij
Add type checking.
568
    :param parameters: Dictionary of parameters, keys and values as bytestrings
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
569
    """
570
    (base, existing_parameters) = split_segment_parameters(url)
571
    new_parameters = {}
572
    new_parameters.update(existing_parameters)
573
    for key, value in parameters.iteritems():
5163.2.7 by Jelmer Vernooij
Add type checking.
574
        if type(key) is not str:
575
            raise TypeError("parameter key %r is not a bytestring" % key)
576
        if type(value) is not str:
577
            raise TypeError("parameter value %r for %s is not a bytestring" %
578
                (key, value))
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
579
        if "=" in key:
580
            raise errors.InvalidURLJoin("= exists in parameter key", url,
581
                parameters)
582
        new_parameters[key] = value
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
583
    return join_segment_parameters_raw(base, 
5163.2.6 by Jelmer Vernooij
Fix example names in tests.
584
        *["%s=%s" % item for item in sorted(new_parameters.items())])
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
585
586
1711.2.44 by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it.
587
def _win32_strip_local_trailing_slash(url):
588
    """Strip slashes after the drive letter"""
589
    if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:
590
        return url[:-1]
591
    else:
592
        return url
593
594
1685.1.47 by John Arbash Meinel
s comes before u
595
def strip_trailing_slash(url):
596
    """Strip trailing slash, except for root paths.
597
598
    The definition of 'root path' is platform-dependent.
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
599
    This assumes that all URLs are valid netloc urls, such that they
600
    form:
601
    scheme://host/path
602
    It searches for ://, and then refuses to remove the next '/'.
603
    It can also handle relative paths
604
    Examples:
605
        path/to/foo       => path/to/foo
606
        path/to/foo/      => path/to/foo
607
        http://host/path/ => http://host/path
608
        http://host/path  => http://host/path
609
        http://host/      => http://host/
610
        file:///          => file:///
611
        file:///foo/      => file:///foo
612
        # This is unique on win32 platforms, and is the only URL
613
        # format which does it differently.
1711.4.8 by John Arbash Meinel
switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters
614
        file:///c|/       => file:///c:/
1685.1.47 by John Arbash Meinel
s comes before u
615
    """
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
616
    if not url.endswith('/'):
617
        # Nothing to do
618
        return url
2245.6.1 by Alexander Belchenko
win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on //
619
    if sys.platform == 'win32' and url.startswith('file://'):
1711.2.44 by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it.
620
        return _win32_strip_local_trailing_slash(url)
1685.1.80 by Wouter van Heyst
more code cleanup
621
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
622
    scheme_loc, first_path_slash = _find_scheme_and_separator(url)
623
    if scheme_loc is None:
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
624
        # This is a relative path, as it has no scheme
625
        # so just chop off the last character
1685.1.47 by John Arbash Meinel
s comes before u
626
        return url[:-1]
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
627
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
628
    if first_path_slash is None or first_path_slash == len(url)-1:
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
629
        # Don't chop off anything if the only slash is the path
630
        # separating slash
1685.1.47 by John Arbash Meinel
s comes before u
631
        return url
632
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
633
    return url[:-1]
634
1685.1.47 by John Arbash Meinel
s comes before u
635
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
636
def unescape(url):
637
    """Unescape relpath from url format.
638
639
    This returns a Unicode path from a URL
640
    """
641
    # jam 20060427 URLs are supposed to be ASCII only strings
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
642
    #       If they are passed in as unicode, unquote
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
643
    #       will return a UNICODE string, which actually contains
644
    #       utf-8 bytes. So we have to ensure that they are
645
    #       plain ASCII strings, or the final .decode will
646
    #       try to encode the UNICODE => ASCII, and then decode
647
    #       it into utf-8.
648
    try:
649
        url = str(url)
650
    except UnicodeError, e:
651
        raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
1685.1.80 by Wouter van Heyst
more code cleanup
652
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
653
    unquoted = unquote(url)
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
654
    try:
655
        unicode_path = unquoted.decode('utf-8')
656
    except UnicodeError, e:
657
        raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
658
    return unicode_path
659
660
661
# These are characters that if escaped, should stay that way
662
_no_decode_chars = ';/?:@&=+$,#'
663
_no_decode_ords = [ord(c) for c in _no_decode_chars]
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
664
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
665
                + ['%02X' % o for o in _no_decode_ords])
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
666
_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
667
                    + [('%02X' % o, chr(o)) for o in range(256)]))
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
668
#These entries get mapped to themselves
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
669
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
670
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
671
# These characters shouldn't be percent-encoded, and it's always safe to
672
# unencode them if they are.
673
_url_dont_escape_characters = set(
674
   "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
675
   "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
676
   "0123456789" # Numbers
677
   "-._~"  # Unreserved characters
678
)
679
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
680
# These characters should not be escaped
2167.2.2 by Aaron Bentley
Update safe character list
681
_url_safe_characters = set(
682
   "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
683
   "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
684
   "0123456789" # Numbers
685
   "_.-!~*'()"  # Unreserved characters
686
   "/;?:@&=+$," # Reserved characters
687
   "%#"         # Extra reserved characters
688
)
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
689
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
690
def unescape_for_display(url, encoding):
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
691
    """Decode what you can for a URL, so that we get a nice looking path.
692
693
    This will turn file:// urls into local paths, and try to decode
694
    any portions of a http:// style url that it can.
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
695
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
696
    Any sections of the URL which can't be represented in the encoding or
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
697
    need to stay as escapes are left alone.
698
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
699
    :param url: A 7-bit ASCII URL
700
    :param encoding: The final output encoding
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
701
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
702
    :return: A unicode string which can be safely encoded into the
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
703
         specified encoding.
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
704
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
705
    if encoding is None:
706
        raise ValueError('you cannot specify None for the display encoding')
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
707
    if url.startswith('file://'):
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
708
        try:
709
            path = local_path_from_url(url)
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
710
            path.encode(encoding)
711
            return path
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
712
        except UnicodeError:
713
            return url
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
714
715
    # Split into sections to try to decode utf-8
716
    res = url.split('/')
717
    for i in xrange(1, len(res)):
718
        escaped_chunks = res[i].split('%')
719
        for j in xrange(1, len(escaped_chunks)):
720
            item = escaped_chunks[j]
721
            try:
722
                escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
723
            except KeyError:
724
                # Put back the percent symbol
725
                escaped_chunks[j] = '%' + item
726
            except UnicodeDecodeError:
727
                escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
728
        unescaped = ''.join(escaped_chunks)
729
        try:
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
730
            decoded = unescaped.decode('utf-8')
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
731
        except UnicodeDecodeError:
732
            # If this path segment cannot be properly utf-8 decoded
733
            # after doing unescaping we will just leave it alone
734
            pass
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
735
        else:
736
            try:
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
737
                decoded.encode(encoding)
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
738
            except UnicodeEncodeError:
739
                # If this chunk cannot be encoded in the local
740
                # encoding, then we should leave it alone
741
                pass
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
742
            else:
743
                # Otherwise take the url decoded one
744
                res[i] = decoded
745
    return u'/'.join(res)
2512.4.1 by Ian Clatworthy
Fixes #115491 - 'branch lp:projname' now creates ./projname as exected
746
747
748
def derive_to_location(from_location):
749
    """Derive a TO_LOCATION given a FROM_LOCATION.
750
751
    The normal case is a FROM_LOCATION of http://foo/bar => bar.
752
    The Right Thing for some logical destinations may differ though
753
    because no / may be present at all. In that case, the result is
754
    the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
755
    This latter case also applies when a Windows drive
756
    is used without a path, e.g. c:foo-bar => foo-bar.
757
    If no /, path separator or : is found, the from_location is returned.
758
    """
759
    if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
760
        return os.path.basename(from_location.rstrip("/\\"))
761
    else:
762
        sep = from_location.find(":")
763
        if sep > 0:
764
            return from_location[sep+1:]
765
        else:
766
            return from_location
3242.3.26 by Aaron Bentley
Implement rebase_url
767
3242.3.35 by Aaron Bentley
Cleanups and documentation
768
3242.3.26 by Aaron Bentley
Implement rebase_url
769
def _is_absolute(url):
770
    return (osutils.pathjoin('/foo', url) == url)
771
3242.3.35 by Aaron Bentley
Cleanups and documentation
772
3242.3.26 by Aaron Bentley
Implement rebase_url
773
def rebase_url(url, old_base, new_base):
774
    """Convert a relative path from an old base URL to a new base URL.
775
776
    The result will be a relative path.
777
    Absolute paths and full URLs are returned unaltered.
778
    """
779
    scheme, separator = _find_scheme_and_separator(url)
780
    if scheme is not None:
781
        return url
782
    if _is_absolute(url):
783
        return url
784
    old_parsed = urlparse.urlparse(old_base)
785
    new_parsed = urlparse.urlparse(new_base)
786
    if (old_parsed[:2]) != (new_parsed[:2]):
3242.3.33 by Aaron Bentley
Handle relative URL stacking cleanly
787
        raise errors.InvalidRebaseURLs(old_base, new_base)
3242.3.36 by Aaron Bentley
Updates from review comments
788
    return determine_relative_path(new_parsed[2],
3567.2.1 by Michael Hudson
urlutils.rebase_url handles '..' path segments in 'url'
789
                                   join(old_parsed[2], url))
3242.3.26 by Aaron Bentley
Implement rebase_url
790
791
792
def determine_relative_path(from_path, to_path):
793
    """Determine a relative path from from_path to to_path."""
794
    from_segments = osutils.splitpath(from_path)
795
    to_segments = osutils.splitpath(to_path)
796
    count = -1
797
    for count, (from_element, to_element) in enumerate(zip(from_segments,
798
                                                       to_segments)):
799
        if from_element != to_element:
800
            break
801
    else:
802
        count += 1
803
    unique_from = from_segments[count:]
804
    unique_to = to_segments[count:]
805
    segments = (['..'] * len(unique_from) + unique_to)
806
    if len(segments) == 0:
807
        return '.'
808
    return osutils.pathjoin(*segments)
3873.3.1 by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test
809
810
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
811
class URL(object):
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
812
    """Parsed URL."""
813
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
814
    def __init__(self, scheme, quoted_user, quoted_password, quoted_host,
815
            port, quoted_path):
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
816
        self.scheme = scheme
817
        self.quoted_host = quoted_host
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
818
        self.host = unquote(self.quoted_host)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
819
        self.quoted_user = quoted_user
820
        if self.quoted_user is not None:
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
821
            self.user = unquote(self.quoted_user)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
822
        else:
823
            self.user = None
824
        self.quoted_password = quoted_password
825
        if self.quoted_password is not None:
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
826
            self.password = unquote(self.quoted_password)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
827
        else:
828
            self.password = None
829
        self.port = port
6061.1.4 by Martin Packman
Unescape unreserved characters for quoted_path member of URL class
830
        self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
831
        self.path = unquote(self.quoted_path)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
832
6055.2.8 by Jelmer Vernooij
Add repr()
833
    def __eq__(self, other):
834
        return (isinstance(other, self.__class__) and
835
                self.scheme == other.scheme and
836
                self.host == other.host and
837
                self.user == other.user and
838
                self.password == other.password and
839
                self.path == other.path)
840
841
    def __repr__(self):
6055.2.11 by Jelmer Vernooij
Fix tests.
842
        return "<%s(%r, %r, %r, %r, %r, %r)>" % (
6055.2.8 by Jelmer Vernooij
Add repr()
843
            self.__class__.__name__,
6055.2.11 by Jelmer Vernooij
Fix tests.
844
            self.scheme, self.quoted_user, self.quoted_password,
845
            self.quoted_host, self.port, self.quoted_path)
6055.2.8 by Jelmer Vernooij
Add repr()
846
6055.2.6 by Jelmer Vernooij
Split out parse_url.
847
    @classmethod
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
848
    def from_string(cls, url):
849
        """Create a URL object from a string.
6055.2.6 by Jelmer Vernooij
Split out parse_url.
850
851
        :param url: URL as bytestring
852
        """
853
        if isinstance(url, unicode):
854
            raise errors.InvalidURL('should be ascii:\n%r' % url)
855
        url = url.encode('utf-8')
856
        (scheme, netloc, path, params,
857
         query, fragment) = urlparse.urlparse(url, allow_fragments=False)
858
        user = password = host = port = None
859
        if '@' in netloc:
860
            user, host = netloc.rsplit('@', 1)
861
            if ':' in user:
862
                user, password = user.split(':', 1)
863
        else:
864
            host = netloc
865
6055.2.14 by Jelmer Vernooij
Fix long line.
866
        if ':' in host and not (host[0] == '[' and host[-1] == ']'):
867
            # there *is* port
6055.2.6 by Jelmer Vernooij
Split out parse_url.
868
            host, port = host.rsplit(':',1)
869
            try:
870
                port = int(port)
871
            except ValueError:
872
                raise errors.InvalidURL('invalid port number %s in url:\n%s' %
873
                                        (port, url))
874
        if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
875
            host = host[1:-1]
876
877
        return cls(scheme, user, password, host, port, path)
878
6055.2.13 by Jelmer Vernooij
Add URL.__str__.
879
    def __str__(self):
880
        netloc = self.quoted_host
881
        if ":" in netloc:
882
            netloc = "[%s]" % netloc
883
        if self.quoted_user is not None:
884
            # Note that we don't put the password back even if we
885
            # have one so that it doesn't get accidentally
886
            # exposed.
887
            netloc = '%s@%s' % (self.quoted_user, netloc)
888
        if self.port is not None:
889
            netloc = '%s:%d' % (netloc, self.port)
890
        return urlparse.urlunparse(
891
            (self.scheme, netloc, self.quoted_path, None, None, None))
892
6055.2.15 by Jelmer Vernooij
Add URL._combine_paths.
893
    @staticmethod
894
    def _combine_paths(base_path, relpath):
895
        """Transform a Transport-relative path to a remote absolute path.
896
897
        This does not handle substitution of ~ but does handle '..' and '.'
898
        components.
899
900
        Examples::
901
902
            t._combine_paths('/home/sarah', 'project/foo')
903
                => '/home/sarah/project/foo'
904
            t._combine_paths('/home/sarah', '../../etc')
905
                => '/etc'
906
            t._combine_paths('/home/sarah', '/etc')
907
                => '/etc'
908
909
        :param base_path: base path
910
        :param relpath: relative url string for relative part of remote path.
911
        :return: urlencoded string for final path.
912
        """
913
        if not isinstance(relpath, str):
914
            raise errors.InvalidURL(relpath)
6061.1.5 by Martin Packman
Unescape unreserved characters in relative portion when combining paths
915
        relpath = _url_hex_escapes_re.sub(_unescape_safe_chars, relpath)
6055.2.15 by Jelmer Vernooij
Add URL._combine_paths.
916
        if relpath.startswith('/'):
917
            base_parts = []
918
        else:
919
            base_parts = base_path.split('/')
920
        if len(base_parts) > 0 and base_parts[-1] == '':
921
            base_parts = base_parts[:-1]
922
        for p in relpath.split('/'):
923
            if p == '..':
924
                if len(base_parts) == 0:
925
                    # In most filesystems, a request for the parent
926
                    # of root, just returns root.
927
                    continue
928
                base_parts.pop()
929
            elif p == '.':
930
                continue # No-op
931
            elif p != '':
932
                base_parts.append(p)
933
        path = '/'.join(base_parts)
934
        if not path.startswith('/'):
935
            path = '/' + path
936
        return path
937
6055.2.17 by Jelmer Vernooij
Add URL.clone().
938
    def clone(self, offset=None):
939
        """Return a new URL for a path relative to this URL.
940
941
        :param offset: A relative path, already urlencoded
942
        :return: `URL` instance
943
        """
944
        if offset is not None:
945
            relative = unescape(offset).encode('utf-8')
946
            path = self._combine_paths(self.path, relative)
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
947
            path = quote(path, safe="/~")
6055.2.17 by Jelmer Vernooij
Add URL.clone().
948
        else:
949
            path = self.quoted_path
950
        return self.__class__(self.scheme, self.quoted_user,
951
                self.quoted_password, self.quoted_host, self.port,
952
                path)
953
3873.3.1 by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test
954
955
def parse_url(url):
956
    """Extract the server address, the credentials and the path from the url.
957
958
    user, password, host and path should be quoted if they contain reserved
959
    chars.
960
961
    :param url: an quoted url
962
    :return: (scheme, user, password, host, port, path) tuple, all fields
963
        are unquoted.
964
    """
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
965
    parsed_url = URL.from_string(url)
6055.2.6 by Jelmer Vernooij
Split out parse_url.
966
    return (parsed_url.scheme, parsed_url.user, parsed_url.password,
967
        parsed_url.host, parsed_url.port, parsed_url.path)