/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2006-2010 Canonical Ltd
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
16
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
17
"""A collection of function for handling URL operations."""
18
6379.6.3 by Jelmer Vernooij
Use absolute_import.
19
from __future__ import absolute_import
20
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
21
import os
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
22
import re
23
import sys
1996.3.12 by John Arbash Meinel
Change how 'revision' is imported to avoid problems later
24
6621.2.26 by Martin
Misc set of changes to get started with selftest on Python 3
25
try:
26
    import urlparse
27
except ImportError:
28
    from urllib import parse as urlparse
29
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
30
from . import (
31
    errors,
32
    osutils,
33
    )
34
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from .lazy_import import lazy_import
1996.3.12 by John Arbash Meinel
Change how 'revision' is imported to avoid problems later
36
lazy_import(globals(), """
6015.39.2 by Florian Vichot
Fixed an infinite loop when creating a repo at the root of the filesystem,
37
from posixpath import split as _posix_split
1996.3.12 by John Arbash Meinel
Change how 'revision' is imported to avoid problems later
38
""")
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
39
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
40
from .sixish import (
7078.15.1 by Jelmer Vernooij
Fix some more tests.
41
    int2byte,
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
42
    PY3,
6621.2.26 by Martin
Misc set of changes to get started with selftest on Python 3
43
    text_type,
44
    )
45
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
46
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
47
class InvalidURL(errors.PathError):
48
49
    _fmt = 'Invalid url supplied to transport: "%(path)s"%(extra)s'
50
51
52
class InvalidURLJoin(errors.PathError):
53
54
    _fmt = "Invalid URL join request: %(reason)s: %(base)r + %(join_args)r"
55
56
    def __init__(self, reason, base, join_args):
57
        self.reason = reason
58
        self.base = base
59
        self.join_args = join_args
60
        errors.PathError.__init__(self, base, reason)
61
62
63
class InvalidRebaseURLs(errors.PathError):
64
65
    _fmt = "URLs differ by more than path: %(from_)r and %(to)r"
66
67
    def __init__(self, from_, to):
68
        self.from_ = from_
69
        self.to = to
70
        errors.PathError.__init__(self, from_, 'URLs differ by more than path.')
71
72
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
73
def basename(url, exclude_trailing_slash=True):
74
    """Return the last component of a URL.
75
76
    :param url: The URL in question
77
    :param exclude_trailing_slash: If the url looks like "path/to/foo/"
78
        ignore the final slash and return 'foo' rather than ''
79
    :return: Just the final component of the URL. This can return ''
80
        if you don't exclude_trailing_slash, or if you are at the
81
        root of the URL.
82
    """
83
    return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]
84
85
86
def dirname(url, exclude_trailing_slash=True):
87
    """Return the parent directory of the given path.
88
89
    :param url: Relative or absolute URL
90
    :param exclude_trailing_slash: Remove a final slash
91
        (treat http://host/foo/ as http://host/foo, but
92
        http://host/ stays http://host/)
93
    :return: Everything in the URL except the last path chunk
94
    """
95
    # TODO: jam 20060502 This was named dirname to be consistent
96
    #       with the os functions, but maybe "parent" would be better
97
    return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
98
99
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
100
if PY3:
101
    quote_from_bytes = urlparse.quote_from_bytes
102
    quote = urlparse.quote
7045.4.1 by Jelmer Vernooij
Some brz-git fixes.
103
    unquote_to_bytes = urlparse.unquote_to_bytes
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
104
else:
105
    # Private copies of quote and unquote, copied from Python's
106
    # urllib module because urllib unconditionally imports socket, which imports
107
    # ssl.
108
109
    always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
110
                   'abcdefghijklmnopqrstuvwxyz'
111
                   '0123456789' '_.-')
112
    _safe_map = {}
113
    for i, c in zip(range(256), ''.join(map(chr, range(256)))):
114
        _safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
115
    _safe_quoters = {}
116
117
    def quote_from_bytes(s, safe='/'):
118
        """quote('abc def') -> 'abc%20def'
119
120
        Each part of a URL, e.g. the path info, the query, etc., has a
121
        different set of reserved characters that must be quoted.
122
123
        RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
124
        the following reserved characters.
125
126
        reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
127
                      "$" | ","
128
129
        Each of these characters is reserved in some component of a URL,
130
        but not necessarily in all of them.
131
132
        By default, the quote function is intended for quoting the path
133
        section of a URL.  Thus, it will not encode '/'.  This character
134
        is reserved, but in typical usage the quote function is being
135
        called on a path where the existing slash characters are used as
136
        reserved characters.
137
        """
138
        # fastpath
139
        if not s:
140
            if s is None:
141
                raise TypeError('None object cannot be quoted')
142
            return s
143
        cachekey = (safe, always_safe)
144
        try:
145
            (quoter, safe) = _safe_quoters[cachekey]
146
        except KeyError:
147
            safe_map = _safe_map.copy()
148
            safe_map.update([(c, c) for c in safe])
149
            quoter = safe_map.__getitem__
150
            safe = always_safe + safe
151
            _safe_quoters[cachekey] = (quoter, safe)
152
        if not s.rstrip(safe):
153
            return s
154
        return ''.join(map(quoter, s))
155
156
    quote = quote_from_bytes
7045.4.1 by Jelmer Vernooij
Some brz-git fixes.
157
    unquote_to_bytes = urlparse.unquote
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
158
159
160
unquote = urlparse.unquote
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
161
162
5268.7.11 by Jelmer Vernooij
revert some unnecessary changes
163
def escape(relpath):
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
164
    """Escape relpath to be a valid url."""
6973.6.1 by Jelmer Vernooij
More bees.
165
    if not isinstance(relpath, str) and sys.version_info[0] == 2:
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
166
        relpath = relpath.encode('utf-8')
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
167
    return quote(relpath, safe='/~')
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
168
169
1685.1.46 by John Arbash Meinel
Sorting functions by name.
170
def file_relpath(base, path):
171
    """Compute just the relative sub-portion of a url
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
172
1685.1.46 by John Arbash Meinel
Sorting functions by name.
173
    This assumes that both paths are already fully specified file:// URLs.
174
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
175
    if len(base) < MIN_ABS_FILEURL_LENGTH:
4539.1.1 by Andrew Bennetts
Improve error message in osutils.file_relpath.
176
        raise ValueError('Length of base (%r) must equal or'
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
177
            ' exceed the platform minimum url length (which is %d)' %
4539.1.1 by Andrew Bennetts
Improve error message in osutils.file_relpath.
178
            (base, MIN_ABS_FILEURL_LENGTH))
6240.4.3 by Martin Packman
Use filesystem rather than url path function to strip terminal slash
179
    base = osutils.normpath(local_path_from_url(base))
180
    path = osutils.normpath(local_path_from_url(path))
1996.3.12 by John Arbash Meinel
Change how 'revision' is imported to avoid problems later
181
    return escape(osutils.relpath(base, path))
1685.1.46 by John Arbash Meinel
Sorting functions by name.
182
183
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
184
def _find_scheme_and_separator(url):
185
    """Find the scheme separator (://) and the first path separator
186
187
    This is just a helper functions for other path utilities.
188
    It could probably be replaced by urlparse
189
    """
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
190
    m = _url_scheme_re.match(url)
191
    if not m:
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
192
        return None, None
193
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
194
    scheme = m.group('scheme')
195
    path = m.group('path')
196
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
197
    # Find the path separating slash
198
    # (first slash after the ://)
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
199
    first_path_slash = path.find('/')
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
200
    if first_path_slash == -1:
1685.1.56 by John Arbash Meinel
Fixing _find_scheme_and_separator
201
        return len(scheme), None
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
202
    return len(scheme), first_path_slash+m.start('path')
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
203
204
5254.2.1 by Gordon Tyler
Fixed how get_transport's convert_path_to_url tests whether a path is actually a URL.
205
def is_url(url):
206
    """Tests whether a URL is in actual fact a URL."""
207
    return _url_scheme_re.match(url) is not None
208
209
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
210
def join(base, *args):
211
    """Create a URL by joining sections.
212
213
    This will normalize '..', assuming that paths are absolute
214
    (it assumes no symlinks in either path)
215
216
    If any of *args is an absolute URL, it will be treated correctly.
217
    Example:
218
        join('http://foo', 'http://bar') => 'http://bar'
219
        join('http://foo', 'bar') => 'http://foo/bar'
220
        join('http://foo', 'bar', '../baz') => 'http://foo/baz'
221
    """
2018.5.100 by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.
222
    if not args:
223
        return base
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
224
    scheme_end, path_start = _find_scheme_and_separator(base)
225
    if scheme_end is None and path_start is None:
226
        path_start = 0
227
    elif path_start is None:
228
        path_start = len(base)
229
    path = base[path_start:]
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
230
    for arg in args:
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
231
        arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)
232
        if arg_scheme_end is None and arg_path_start is None:
233
            arg_path_start = 0
234
        elif arg_path_start is None:
235
            arg_path_start = len(arg)
5254.1.5 by Gordon Tyler
Fixes according to spiv's review.
236
        if arg_scheme_end is not None:
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
237
            base = arg
238
            path = arg[arg_path_start:]
239
            scheme_end = arg_scheme_end
240
            path_start = arg_path_start
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
241
        else:
2018.5.54 by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone
242
            path = joinpath(path, arg)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
243
    return base[:path_start] + path
1685.1.55 by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs
244
245
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
246
def joinpath(base, *args):
247
    """Join URL path segments to a URL path segment.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
248
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
249
    This is somewhat like osutils.joinpath, but intended for URLs.
250
251
    XXX: this duplicates some normalisation logic, and also duplicates a lot of
252
    path handling logic that already exists in some Transport implementations.
253
    We really should try to have exactly one place in the code base responsible
254
    for combining paths of URLs.
255
    """
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
256
    path = base.split('/')
257
    if len(path) > 1 and path[-1] == '':
2018.5.100 by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.
258
        #If the path ends in a trailing /, remove it.
259
        path.pop()
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
260
    for arg in args:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
261
        if arg.startswith('/'):
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
262
            path = []
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
263
        for chunk in arg.split('/'):
264
            if chunk == '.':
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
265
                continue
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
266
            elif chunk == '..':
267
                if path == ['']:
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
268
                    raise InvalidURLJoin('Cannot go above root',
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
269
                            base, args)
270
                path.pop()
271
            else:
272
                path.append(chunk)
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
273
    if path == ['']:
274
        return '/'
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
275
    else:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
276
        return '/'.join(path)
2018.5.46 by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.
277
278
1685.1.46 by John Arbash Meinel
Sorting functions by name.
279
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
280
def _posix_local_path_from_url(url):
281
    """Convert a url like file:///path/to/foo into /path/to/foo"""
5268.7.21 by Jelmer Vernooij
Cope with segment parameters in urls.
282
    url = split_segment_parameters_raw(url)[0]
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
283
    file_localhost_prefix = 'file://localhost/'
4828.1.1 by Michael Hudson
test and fix
284
    if url.startswith(file_localhost_prefix):
285
        path = url[len(file_localhost_prefix) - 1:]
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
286
    elif not url.startswith('file:///'):
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
287
        raise InvalidURL(
4828.1.1 by Michael Hudson
test and fix
288
            url, 'local urls must start with file:/// or file://localhost/')
289
    else:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
290
        path = url[len('file://'):]
1685.1.46 by John Arbash Meinel
Sorting functions by name.
291
    # We only strip off 2 slashes
4828.1.1 by Michael Hudson
test and fix
292
    return unescape(path)
1685.1.46 by John Arbash Meinel
Sorting functions by name.
293
294
295
def _posix_local_path_to_url(path):
296
    """Convert a local path like ./foo into a URL like file:///path/to/foo
297
298
    This also handles transforming escaping unicode characters, etc.
299
    """
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
300
    # importing directly from posixpath allows us to test this
1685.1.46 by John Arbash Meinel
Sorting functions by name.
301
    # on non-posix platforms
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
302
    return 'file://' + escape(osutils._posix_abspath(path))
1685.1.46 by John Arbash Meinel
Sorting functions by name.
303
304
305
def _win32_local_path_from_url(url):
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
306
    """Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
307
    if not url.startswith('file://'):
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
308
        raise InvalidURL(url, 'local urls must start with file:///, '
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
309
                                     'UNC path urls must start with file://')
5268.7.21 by Jelmer Vernooij
Cope with segment parameters in urls.
310
    url = split_segment_parameters_raw(url)[0]
1685.1.46 by John Arbash Meinel
Sorting functions by name.
311
    # We strip off all 3 slashes
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
312
    win32_url = url[len('file:'):]
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
313
    # check for UNC path: //HOST/path
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
314
    if not win32_url.startswith('///'):
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
315
        if (win32_url[2] == '/'
316
            or win32_url[3] in '|:'):
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
317
            raise InvalidURL(url, 'Win32 UNC path urls'
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
318
                ' have form file://HOST/path')
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
319
        return unescape(win32_url)
3503.1.2 by adwi2
Permits Windows to serve all paths on all drives.
320
321
    # allow empty paths so we can serve all roots
322
    if win32_url == '///':
323
        return '/'
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
324
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
325
    # usual local path with drive letter
5510.2.3 by Jared Bunting
Changed _win32_local_path_from_url to not allow "file:///C:" form.
326
    if (len(win32_url) < 6
5510.2.1 by Jared Bunting
Modified _win32_local_path_from_url to:
327
        or win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
328
                                'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
329
        or win32_url[4] not in  '|:'
5510.2.3 by Jared Bunting
Changed _win32_local_path_from_url to not allow "file:///C:" form.
330
        or win32_url[5] != '/'):
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
331
        raise InvalidURL(url, 'Win32 file urls start with'
1711.4.8 by John Arbash Meinel
switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters
332
                ' file:///x:/, where x is a valid drive letter')
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
333
    return win32_url[3].upper() + u':' + unescape(win32_url[5:])
1685.1.46 by John Arbash Meinel
Sorting functions by name.
334
335
336
def _win32_local_path_to_url(path):
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
337
    """Convert a local path like ./foo into a URL like file:///C:/path/to/foo
1685.1.46 by John Arbash Meinel
Sorting functions by name.
338
339
    This also handles transforming escaping unicode characters, etc.
340
    """
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
341
    # importing directly from ntpath allows us to test this
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
342
    # on non-win32 platform
343
    # FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
344
    #       which actually strips trailing space characters.
5278.1.5 by Martin Pool
Correct more sloppy use of the term 'Linux'
345
    #       The worst part is that on linux ntpath.abspath has different
1711.4.4 by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior
346
    #       semantics, since 'nt' is not an available module.
3503.1.1 by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url
347
    if path == '/':
3503.1.2 by adwi2
Permits Windows to serve all paths on all drives.
348
        return 'file:///'
3503.1.1 by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url
349
2279.4.2 by Alexander Belchenko
Don't do normpath after abspath, because this function is called inside abspath
350
    win32_path = osutils._win32_abspath(path)
2162.2.2 by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path)
351
    # check for UNC path \\HOST\path
352
    if win32_path.startswith('//'):
2162.2.7 by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path
353
        return 'file:' + escape(win32_path)
3234.3.1 by Alexander Belchenko
ensure that local_path_to_url() always returns plain string, not unicode.
354
    return ('file:///' + str(win32_path[0].upper()) + ':' +
355
        escape(win32_path[2:]))
1685.1.46 by John Arbash Meinel
Sorting functions by name.
356
357
358
local_path_to_url = _posix_local_path_to_url
359
local_path_from_url = _posix_local_path_from_url
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
360
MIN_ABS_FILEURL_LENGTH = len('file:///')
1711.4.17 by John Arbash Meinel
[merge] bzr.dev 1790
361
WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')
1685.1.46 by John Arbash Meinel
Sorting functions by name.
362
363
if sys.platform == 'win32':
364
    local_path_to_url = _win32_local_path_to_url
365
    local_path_from_url = _win32_local_path_from_url
366
1711.2.44 by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it.
367
    MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
368
369
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
370
_url_scheme_re = re.compile('^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')
371
_url_hex_escapes_re = re.compile('(%[0-9a-fA-F]{2})')
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
372
373
374
def _unescape_safe_chars(matchobj):
375
    """re.sub callback to convert hex-escapes to plain characters (if safe).
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
376
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
377
    e.g. '%7E' will be converted to '~'.
378
    """
379
    hex_digits = matchobj.group(0)[1:]
380
    char = chr(int(hex_digits, 16))
381
    if char in _url_dont_escape_characters:
382
        return char
383
    else:
384
        return matchobj.group(0).upper()
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
385
386
387
def normalize_url(url):
388
    """Make sure that a path string is in fully normalized URL form.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
389
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
390
    This handles URLs which have unicode characters, spaces,
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
391
    special characters, etc.
392
393
    It has two basic modes of operation, depending on whether the
394
    supplied string starts with a url specifier (scheme://) or not.
395
    If it does not have a specifier it is considered a local path,
396
    and will be converted into a file:/// url. Non-ascii characters
397
    will be encoded using utf-8.
398
    If it does have a url specifier, it will be treated as a "hybrid"
399
    URL. Basically, a URL that should have URL special characters already
400
    escaped (like +?&# etc), but may have unicode characters, etc
401
    which would not be valid in a real URL.
402
403
    :param url: Either a hybrid URL or a local path
404
    :return: A normalized URL which only includes 7-bit ASCII characters.
405
    """
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
406
    scheme_end, path_start = _find_scheme_and_separator(url)
407
    if scheme_end is None:
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
408
        return local_path_to_url(url)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
409
    prefix = url[:path_start]
410
    path = url[path_start:]
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
411
    if not isinstance(url, text_type):
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
412
        for c in url:
413
            if c not in _url_safe_characters:
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
414
                raise InvalidURL(url, 'URLs can only contain specific'
1685.1.53 by John Arbash Meinel
Updated normalize_url
415
                                            ' safe characters (not %r)' % c)
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
416
        path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
417
        return str(prefix + ''.join(path))
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
418
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
419
    # We have a unicode (hybrid) url
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
420
    path_chars = list(path)
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
421
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
422
    for i in range(len(path_chars)):
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
423
        if path_chars[i] not in _url_safe_characters:
424
            chars = path_chars[i].encode('utf-8')
425
            path_chars[i] = ''.join(
7058.4.1 by Jelmer Vernooij
Fix another 40 tests.
426
                ['%%%02X' % c for c in bytearray(path_chars[i].encode('utf-8'))])
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
427
    path = ''.join(path_chars)
428
    path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler
Added support to urlutils for URLs such as Launchpad's lp:foobar.
429
    return str(prefix + path)
1685.1.50 by John Arbash Meinel
Added an re for handling scheme paths.
430
431
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
432
def relative_url(base, other):
433
    """Return a path to other from base.
434
435
    If other is unrelated to base, return other. Else return a relative path.
436
    This assumes no symlinks as part of the url.
437
    """
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
438
    dummy, base_first_slash = _find_scheme_and_separator(base)
439
    if base_first_slash is None:
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
440
        return other
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
441
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
442
    dummy, other_first_slash = _find_scheme_and_separator(other)
443
    if other_first_slash is None:
444
        return other
445
446
    # this takes care of differing schemes or hosts
447
    base_scheme = base[:base_first_slash]
448
    other_scheme = other[:other_first_slash]
449
    if base_scheme != other_scheme:
450
        return other
3139.2.1 by Alexander Belchenko
bugfix #90847: fix problem with parent location on another logical drive
451
    elif sys.platform == 'win32' and base_scheme == 'file://':
452
        base_drive = base[base_first_slash+1:base_first_slash+3]
453
        other_drive = other[other_first_slash+1:other_first_slash+3]
454
        if base_drive != other_drive:
455
            return other
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
456
457
    base_path = base[base_first_slash+1:]
458
    other_path = other[other_first_slash+1:]
459
460
    if base_path.endswith('/'):
461
        base_path = base_path[:-1]
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
462
463
    base_sections = base_path.split('/')
464
    other_sections = other_path.split('/')
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
465
466
    if base_sections == ['']:
467
        base_sections = []
468
    if other_sections == ['']:
469
        other_sections = []
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
470
471
    output_sections = []
472
    for b, o in zip(base_sections, other_sections):
473
        if b != o:
474
            break
475
        output_sections.append(b)
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
476
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
477
    match_len = len(output_sections)
1685.1.71 by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls
478
    output_sections = ['..' for x in base_sections[match_len:]]
1685.1.70 by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken
479
    output_sections.extend(other_sections[match_len:])
480
481
    return "/".join(output_sections) or "."
482
483
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
484
def _win32_extract_drive_letter(url_base, path):
485
    """On win32 the drive letter needs to be added to the url base."""
486
    # Strip off the drive letter
487
    # path is currently /C:/foo
6123.3.2 by Martin
Treat file:///C: as invalid on windows instead of throwing an IndexError
488
    if len(path) < 4 or path[2] not in ':|' or path[3] != '/':
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
489
        raise InvalidURL(url_base + path,
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
490
            'win32 file:/// paths need a drive letter')
491
    url_base += path[0:3] # file:// + /C:
492
    path = path[3:] # /foo
493
    return url_base, path
494
495
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
496
def split(url, exclude_trailing_slash=True):
497
    """Split a URL into its parent directory and a child directory.
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
498
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
499
    :param url: A relative or absolute URL
500
    :param exclude_trailing_slash: Strip off a final '/' if it is part
501
        of the path (but not if it is part of the protocol specification)
1685.1.61 by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split
502
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
503
    :return: (parent_url, child_dir).  child_dir may be the empty string if we're at
1685.1.61 by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split
504
        the root.
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
505
    """
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
506
    scheme_loc, first_path_slash = _find_scheme_and_separator(url)
507
508
    if first_path_slash is None:
509
        # We have either a relative path, or no separating slash
510
        if scheme_loc is None:
511
            # Relative path
512
            if exclude_trailing_slash and url.endswith('/'):
513
                url = url[:-1]
514
            return _posix_split(url)
515
        else:
516
            # Scheme with no path
517
            return url, ''
518
519
    # We have a fully defined path
520
    url_base = url[:first_path_slash] # http://host, file://
521
    path = url[first_path_slash:] # /file/foo
522
523
    if sys.platform == 'win32' and url.startswith('file:///'):
524
        # Strip off the drive letter
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
525
        # url_base is currently file://
1711.2.39 by John Arbash Meinel
Fix bzrlib.urlutils.split() to work properly on win32 local paths.
526
        # path is currently /C:/foo
1711.2.43 by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms.
527
        url_base, path = _win32_extract_drive_letter(url_base, path)
528
        # now it should be file:///C: and /foo
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
529
530
    if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):
531
        path = path[:-1]
532
    head, tail = _posix_split(path)
533
    return url_base + head, tail
534
1685.1.46 by John Arbash Meinel
Sorting functions by name.
535
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
536
def split_segment_parameters_raw(url):
5163.2.1 by Jelmer Vernooij
Add urlutils.split_subsegments.
537
    """Split the subsegment of the last segment of a URL.
538
539
    :param url: A relative or absolute URL
540
    :return: (url, subsegments)
541
    """
6278.1.4 by Martin Packman
Fix fallout on per_transport tests from suspect terminal slash handling
542
    # GZ 2011-11-18: Dodgy removing the terminal slash like this, function
543
    #                operates on urls not url+segments, and Transport classes
6278.1.5 by Martin Packman
Spelling tweaks suggested by vila in review
544
    #                should not be blindly adding slashes in the first place. 
6278.1.4 by Martin Packman
Fix fallout on per_transport tests from suspect terminal slash handling
545
    lurl = strip_trailing_slash(url)
6278.1.5 by Martin Packman
Spelling tweaks suggested by vila in review
546
    # Segments begin at first comma after last forward slash, if one exists
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
547
    segment_start = lurl.find(",", lurl.rfind("/")+1)
6278.1.3 by Martin Packman
Stop using urlutils.split before segment parameters have been removed
548
    if segment_start == -1:
5163.2.1 by Jelmer Vernooij
Add urlutils.split_subsegments.
549
        return (url, [])
6963.1.1 by Jelmer Vernooij
Fix a bunch of tests on python3.
550
    return (lurl[:segment_start], [str(s) for s in lurl[segment_start+1:].split(",")])
5163.2.1 by Jelmer Vernooij
Add urlutils.split_subsegments.
551
552
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
553
def split_segment_parameters(url):
554
    """Split the segment parameters of the last segment of a URL.
555
556
    :param url: A relative or absolute URL
557
    :return: (url, segment_parameters)
558
    """
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
559
    (base_url, subsegments) = split_segment_parameters_raw(url)
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
560
    parameters = {}
561
    for subsegment in subsegments:
562
        (key, value) = subsegment.split("=", 1)
6963.1.1 by Jelmer Vernooij
Fix a bunch of tests on python3.
563
        if not isinstance(key, str):
564
            raise TypeError(key)
565
        if not isinstance(value, str):
566
            raise TypeError(value)
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
567
        parameters[key] = value
568
    return (base_url, parameters)
569
570
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
571
def join_segment_parameters_raw(base, *subsegments):
5163.2.7 by Jelmer Vernooij
Add type checking.
572
    """Create a new URL by adding subsegments to an existing one. 
573
574
    This adds the specified subsegments to the last path in the specified
575
    base URL. The subsegments should be bytestrings.
5163.2.2 by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments.
576
5163.2.5 by Jelmer Vernooij
rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.
577
    :note: You probably want to use join_segment_parameters instead.
5163.2.2 by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments.
578
    """
579
    if not subsegments:
580
        return base
581
    for subsegment in subsegments:
6619.3.18 by Jelmer Vernooij
Run 2to3 idioms fixer.
582
        if not isinstance(subsegment, str):
5163.2.7 by Jelmer Vernooij
Add type checking.
583
            raise TypeError("Subsegment %r is not a bytestring" % subsegment)
5163.2.2 by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments.
584
        if "," in subsegment:
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
585
            raise InvalidURLJoin(", exists in subsegments",
5163.2.2 by Jelmer Vernooij
Add bzrlib.urlutils.join_subsegments.
586
                                        base, subsegments)
587
    return ",".join((base,) + subsegments)
588
589
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
590
def join_segment_parameters(url, parameters):
591
    """Create a new URL by adding segment parameters to an existing one.
592
5163.2.7 by Jelmer Vernooij
Add type checking.
593
    The parameters of the last segment in the URL will be updated; if a
594
    parameter with the same key already exists it will be overwritten.
595
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
596
    :param url: A URL, as string
5163.2.7 by Jelmer Vernooij
Add type checking.
597
    :param parameters: Dictionary of parameters, keys and values as bytestrings
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
598
    """
599
    (base, existing_parameters) = split_segment_parameters(url)
600
    new_parameters = {}
601
    new_parameters.update(existing_parameters)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
602
    for key, value in parameters.items():
6619.3.18 by Jelmer Vernooij
Run 2to3 idioms fixer.
603
        if not isinstance(key, str):
6973.6.1 by Jelmer Vernooij
More bees.
604
            raise TypeError("parameter key %r is not a str" % key)
6619.3.18 by Jelmer Vernooij
Run 2to3 idioms fixer.
605
        if not isinstance(value, str):
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
606
            raise TypeError("parameter value %r for %r is not a str" %
607
                (value, key))
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
608
        if "=" in key:
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
609
            raise InvalidURLJoin("= exists in parameter key", url,
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
610
                parameters)
611
        new_parameters[key] = value
6963.1.1 by Jelmer Vernooij
Fix a bunch of tests on python3.
612
    return join_segment_parameters_raw(base,
5163.2.6 by Jelmer Vernooij
Fix example names in tests.
613
        *["%s=%s" % item for item in sorted(new_parameters.items())])
5163.2.3 by Jelmer Vernooij
Add join_segment_parameters / split_segment_parameters.
614
615
1711.2.44 by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it.
616
def _win32_strip_local_trailing_slash(url):
617
    """Strip slashes after the drive letter"""
618
    if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:
619
        return url[:-1]
620
    else:
621
        return url
622
623
1685.1.47 by John Arbash Meinel
s comes before u
624
def strip_trailing_slash(url):
625
    """Strip trailing slash, except for root paths.
626
627
    The definition of 'root path' is platform-dependent.
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
628
    This assumes that all URLs are valid netloc urls, such that they
629
    form:
630
    scheme://host/path
631
    It searches for ://, and then refuses to remove the next '/'.
632
    It can also handle relative paths
633
    Examples:
634
        path/to/foo       => path/to/foo
635
        path/to/foo/      => path/to/foo
636
        http://host/path/ => http://host/path
637
        http://host/path  => http://host/path
638
        http://host/      => http://host/
639
        file:///          => file:///
640
        file:///foo/      => file:///foo
641
        # This is unique on win32 platforms, and is the only URL
642
        # format which does it differently.
1711.4.8 by John Arbash Meinel
switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters
643
        file:///c|/       => file:///c:/
1685.1.47 by John Arbash Meinel
s comes before u
644
    """
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
645
    if not url.endswith('/'):
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
646
        # Nothing to do
647
        return url
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
648
    if sys.platform == 'win32' and url.startswith('file://'):
1711.2.44 by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it.
649
        return _win32_strip_local_trailing_slash(url)
1685.1.80 by Wouter van Heyst
more code cleanup
650
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
651
    scheme_loc, first_path_slash = _find_scheme_and_separator(url)
652
    if scheme_loc is None:
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
653
        # This is a relative path, as it has no scheme
654
        # so just chop off the last character
1685.1.47 by John Arbash Meinel
s comes before u
655
        return url[:-1]
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
656
1685.1.49 by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname
657
    if first_path_slash is None or first_path_slash == len(url)-1:
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
658
        # Don't chop off anything if the only slash is the path
659
        # separating slash
1685.1.47 by John Arbash Meinel
s comes before u
660
        return url
661
1685.1.48 by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests
662
    return url[:-1]
663
1685.1.47 by John Arbash Meinel
s comes before u
664
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
665
def unescape(url):
666
    """Unescape relpath from url format.
667
668
    This returns a Unicode path from a URL
669
    """
7067.5.1 by Jelmer Vernooij
Check for unicode in URLs in unescape
670
    # jam 20060427 URLs are supposed to be ASCII only strings
671
    #       If they are passed in as unicode, unquote
672
    #       will return a UNICODE string, which actually contains
673
    #       utf-8 bytes. So we have to ensure that they are
674
    #       plain ASCII strings, or the final .decode will
675
    #       try to encode the UNICODE => ASCII, and then decode
676
    #       it into utf-8.
677
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
678
    if PY3:
7067.5.1 by Jelmer Vernooij
Check for unicode in URLs in unescape
679
        if isinstance(url, text_type):
680
            try:
681
                url.encode("ascii")
682
            except UnicodeError as e:
683
                raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
684
        return urlparse.unquote(url)
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
685
    else:
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
686
        if isinstance(url, text_type):
687
            try:
688
                url = url.encode("ascii")
689
            except UnicodeError as e:
690
                raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
691
        unquoted = unquote(url)
7045.3.1 by Jelmer Vernooij
Fix another ~500 tests.
692
        try:
693
            unicode_path = unquoted.decode('utf-8')
694
        except UnicodeError as e:
695
            raise InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
696
        return unicode_path
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
697
698
699
# These are characters that if escaped, should stay that way
700
_no_decode_chars = ';/?:@&=+$,#'
701
_no_decode_ords = [ord(c) for c in _no_decode_chars]
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
702
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
703
                + ['%02X' % o for o in _no_decode_ords])
7078.15.1 by Jelmer Vernooij
Fix some more tests.
704
_hex_display_map = dict(([('%02x' % o, int2byte(o)) for o in range(256)]
705
                    + [('%02X' % o, int2byte(o)) for o in range(256)]))
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
706
#These entries get mapped to themselves
7078.15.1 by Jelmer Vernooij
Fix some more tests.
707
_hex_display_map.update((hex, b'%'+hex.encode('ascii')) for hex in _no_decode_hex)
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
708
2208.4.1 by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'.
709
# These characters shouldn't be percent-encoded, and it's always safe to
710
# unencode them if they are.
711
_url_dont_escape_characters = set(
712
   "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
713
   "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
714
   "0123456789" # Numbers
715
   "-._~"  # Unreserved characters
716
)
717
1685.1.51 by John Arbash Meinel
Working on getting normalize_url working.
718
# These characters should not be escaped
2167.2.2 by Aaron Bentley
Update safe character list
719
_url_safe_characters = set(
720
   "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
721
   "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
722
   "0123456789" # Numbers
723
   "_.-!~*'()"  # Unreserved characters
724
   "/;?:@&=+$," # Reserved characters
725
   "%#"         # Extra reserved characters
726
)
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
727
7078.15.1 by Jelmer Vernooij
Fix some more tests.
728
729
def _unescape_segment_for_display(segment, encoding):
730
    """Unescape a segment for display.
731
732
    Helper for unescape_for_display
733
734
    :param url: A 7-bit ASCII URL
735
    :param encoding: The final output encoding
736
737
    :return: A unicode string which can be safely encoded into the
738
         specified encoding.
739
    """
740
    escaped_chunks = segment.split('%')
741
    escaped_chunks[0] = escaped_chunks[0].encode('utf-8')
742
    for j in range(1, len(escaped_chunks)):
743
        item = escaped_chunks[j]
744
        try:
745
            escaped_chunks[j] = _hex_display_map[item[:2]]
746
        except KeyError:
747
            # Put back the percent symbol
748
            escaped_chunks[j] = b'%' + (item[:2].encode('utf-8') if PY3 else item[:2])
749
        except UnicodeDecodeError:
750
            escaped_chunks[j] = unichr(int(item[:2], 16)).encode('utf-8')
751
        escaped_chunks[j] +=  (item[2:].encode('utf-8') if PY3 else item[2:])
752
    unescaped = b''.join(escaped_chunks)
753
    try:
754
        decoded = unescaped.decode('utf-8')
755
    except UnicodeDecodeError:
756
        # If this path segment cannot be properly utf-8 decoded
757
        # after doing unescaping we will just leave it alone
758
        return segment
759
    else:
760
        try:
761
            decoded.encode(encoding)
762
        except UnicodeEncodeError:
763
            # If this chunk cannot be encoded in the local
764
            # encoding, then we should leave it alone
765
            return segment
766
        else:
767
            # Otherwise take the url decoded one
768
            return decoded
769
770
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
771
def unescape_for_display(url, encoding):
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
772
    """Decode what you can for a URL, so that we get a nice looking path.
773
774
    This will turn file:// urls into local paths, and try to decode
775
    any portions of a http:// style url that it can.
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
776
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
777
    Any sections of the URL which can't be represented in the encoding or
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
778
    need to stay as escapes are left alone.
779
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
780
    :param url: A 7-bit ASCII URL
781
    :param encoding: The final output encoding
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
782
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
783
    :return: A unicode string which can be safely encoded into the
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
784
         specified encoding.
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
785
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
786
    if encoding is None:
787
        raise ValueError('you cannot specify None for the display encoding')
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
788
    if url.startswith('file://'):
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
789
        try:
790
            path = local_path_from_url(url)
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
791
            path.encode(encoding)
792
            return path
1685.1.54 by John Arbash Meinel
url_for_display now makes sure output can be properly encoded.
793
        except UnicodeError:
794
            return url
1685.1.45 by John Arbash Meinel
Moved url functions into bzrlib.urlutils
795
796
    # Split into sections to try to decode utf-8
797
    res = url.split('/')
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
798
    for i in range(1, len(res)):
7078.15.1 by Jelmer Vernooij
Fix some more tests.
799
        res[i] = _unescape_segment_for_display(res[i], encoding)
1685.1.58 by Martin Pool
urlutils.unescape_for_display should return Unicode
800
    return u'/'.join(res)
2512.4.1 by Ian Clatworthy
Fixes #115491 - 'branch lp:projname' now creates ./projname as exected
801
802
803
def derive_to_location(from_location):
804
    """Derive a TO_LOCATION given a FROM_LOCATION.
805
806
    The normal case is a FROM_LOCATION of http://foo/bar => bar.
807
    The Right Thing for some logical destinations may differ though
808
    because no / may be present at all. In that case, the result is
809
    the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
810
    This latter case also applies when a Windows drive
811
    is used without a path, e.g. c:foo-bar => foo-bar.
812
    If no /, path separator or : is found, the from_location is returned.
813
    """
7111.1.1 by Jelmer Vernooij
Don't let the default directory name derive from a branch name.
814
    from_location, unused_params = split_segment_parameters(from_location)
2512.4.1 by Ian Clatworthy
Fixes #115491 - 'branch lp:projname' now creates ./projname as exected
815
    if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
816
        return os.path.basename(from_location.rstrip("/\\"))
817
    else:
818
        sep = from_location.find(":")
819
        if sep > 0:
820
            return from_location[sep+1:]
821
        else:
822
            return from_location
3242.3.26 by Aaron Bentley
Implement rebase_url
823
3242.3.35 by Aaron Bentley
Cleanups and documentation
824
3242.3.26 by Aaron Bentley
Implement rebase_url
825
def _is_absolute(url):
826
    return (osutils.pathjoin('/foo', url) == url)
827
3242.3.35 by Aaron Bentley
Cleanups and documentation
828
3242.3.26 by Aaron Bentley
Implement rebase_url
829
def rebase_url(url, old_base, new_base):
830
    """Convert a relative path from an old base URL to a new base URL.
831
832
    The result will be a relative path.
833
    Absolute paths and full URLs are returned unaltered.
834
    """
835
    scheme, separator = _find_scheme_and_separator(url)
836
    if scheme is not None:
837
        return url
838
    if _is_absolute(url):
839
        return url
840
    old_parsed = urlparse.urlparse(old_base)
841
    new_parsed = urlparse.urlparse(new_base)
842
    if (old_parsed[:2]) != (new_parsed[:2]):
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
843
        raise InvalidRebaseURLs(old_base, new_base)
3242.3.36 by Aaron Bentley
Updates from review comments
844
    return determine_relative_path(new_parsed[2],
3567.2.1 by Michael Hudson
urlutils.rebase_url handles '..' path segments in 'url'
845
                                   join(old_parsed[2], url))
3242.3.26 by Aaron Bentley
Implement rebase_url
846
847
848
def determine_relative_path(from_path, to_path):
849
    """Determine a relative path from from_path to to_path."""
850
    from_segments = osutils.splitpath(from_path)
851
    to_segments = osutils.splitpath(to_path)
852
    count = -1
853
    for count, (from_element, to_element) in enumerate(zip(from_segments,
854
                                                       to_segments)):
855
        if from_element != to_element:
856
            break
857
    else:
858
        count += 1
859
    unique_from = from_segments[count:]
860
    unique_to = to_segments[count:]
861
    segments = (['..'] * len(unique_from) + unique_to)
862
    if len(segments) == 0:
863
        return '.'
864
    return osutils.pathjoin(*segments)
3873.3.1 by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test
865
866
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
867
class URL(object):
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
868
    """Parsed URL."""
869
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
870
    def __init__(self, scheme, quoted_user, quoted_password, quoted_host,
871
            port, quoted_path):
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
872
        self.scheme = scheme
873
        self.quoted_host = quoted_host
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
874
        self.host = unquote(self.quoted_host)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
875
        self.quoted_user = quoted_user
876
        if self.quoted_user is not None:
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
877
            self.user = unquote(self.quoted_user)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
878
        else:
879
            self.user = None
880
        self.quoted_password = quoted_password
881
        if self.quoted_password is not None:
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
882
            self.password = unquote(self.quoted_password)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
883
        else:
884
            self.password = None
885
        self.port = port
6061.1.4 by Martin Packman
Unescape unreserved characters for quoted_path member of URL class
886
        self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
887
        self.path = unquote(self.quoted_path)
6055.2.1 by Jelmer Vernooij
Add UnparsedUrl.
888
6055.2.8 by Jelmer Vernooij
Add repr()
889
    def __eq__(self, other):
890
        return (isinstance(other, self.__class__) and
891
                self.scheme == other.scheme and
892
                self.host == other.host and
893
                self.user == other.user and
894
                self.password == other.password and
895
                self.path == other.path)
896
897
    def __repr__(self):
6055.2.11 by Jelmer Vernooij
Fix tests.
898
        return "<%s(%r, %r, %r, %r, %r, %r)>" % (
6055.2.8 by Jelmer Vernooij
Add repr()
899
            self.__class__.__name__,
6055.2.11 by Jelmer Vernooij
Fix tests.
900
            self.scheme, self.quoted_user, self.quoted_password,
901
            self.quoted_host, self.port, self.quoted_path)
6055.2.8 by Jelmer Vernooij
Add repr()
902
6055.2.6 by Jelmer Vernooij
Split out parse_url.
903
    @classmethod
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
904
    def from_string(cls, url):
905
        """Create a URL object from a string.
6055.2.6 by Jelmer Vernooij
Split out parse_url.
906
907
        :param url: URL as bytestring
908
        """
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
909
        # GZ 2017-06-09: Actually validate ascii-ness
6973.6.1 by Jelmer Vernooij
More bees.
910
        # pad.lv/1696545: For the moment, accept both native strings and unicode.
911
        if isinstance(url, str):
912
            pass
6973.6.2 by Jelmer Vernooij
Fix more tests.
913
        elif isinstance(url, text_type):
6973.6.1 by Jelmer Vernooij
More bees.
914
            try:
915
                url = url.encode()
916
            except UnicodeEncodeError:
917
                raise InvalidURL(url)
918
        else:
919
            raise InvalidURL(url)
6055.2.6 by Jelmer Vernooij
Split out parse_url.
920
        (scheme, netloc, path, params,
921
         query, fragment) = urlparse.urlparse(url, allow_fragments=False)
922
        user = password = host = port = None
923
        if '@' in netloc:
924
            user, host = netloc.rsplit('@', 1)
925
            if ':' in user:
926
                user, password = user.split(':', 1)
927
        else:
928
            host = netloc
929
6055.2.14 by Jelmer Vernooij
Fix long line.
930
        if ':' in host and not (host[0] == '[' and host[-1] == ']'):
931
            # there *is* port
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
932
            host, port = host.rsplit(':', 1)
7096.2.1 by Jelmer Vernooij
Allow port to be empty when parsing URL.
933
            if port:
934
                try:
935
                    port = int(port)
936
                except ValueError:
937
                    raise InvalidURL('invalid port number %s in url:\n%s' %
938
                                     (port, url))
939
            else:
940
                port = None
6055.2.6 by Jelmer Vernooij
Split out parse_url.
941
        if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
942
            host = host[1:-1]
943
944
        return cls(scheme, user, password, host, port, path)
945
6055.2.13 by Jelmer Vernooij
Add URL.__str__.
946
    def __str__(self):
947
        netloc = self.quoted_host
948
        if ":" in netloc:
949
            netloc = "[%s]" % netloc
950
        if self.quoted_user is not None:
951
            # Note that we don't put the password back even if we
952
            # have one so that it doesn't get accidentally
953
            # exposed.
954
            netloc = '%s@%s' % (self.quoted_user, netloc)
955
        if self.port is not None:
956
            netloc = '%s:%d' % (netloc, self.port)
957
        return urlparse.urlunparse(
958
            (self.scheme, netloc, self.quoted_path, None, None, None))
959
6055.2.15 by Jelmer Vernooij
Add URL._combine_paths.
960
    @staticmethod
961
    def _combine_paths(base_path, relpath):
962
        """Transform a Transport-relative path to a remote absolute path.
963
964
        This does not handle substitution of ~ but does handle '..' and '.'
965
        components.
966
967
        Examples::
968
969
            t._combine_paths('/home/sarah', 'project/foo')
970
                => '/home/sarah/project/foo'
971
            t._combine_paths('/home/sarah', '../../etc')
972
                => '/etc'
973
            t._combine_paths('/home/sarah', '/etc')
974
                => '/etc'
975
976
        :param base_path: base path
977
        :param relpath: relative url string for relative part of remote path.
978
        :return: urlencoded string for final path.
979
        """
6963.2.15 by Jelmer Vernooij
Accept unicode - for now.
980
        # pad.lv/1696545: For the moment, accept both native strings and unicode.
981
        if isinstance(relpath, str):
982
            pass
6986.1.1 by Jelmer Vernooij
Address review comments.
983
        elif isinstance(relpath, text_type):
6963.2.16 by Jelmer Vernooij
Fix unicode tests.
984
            try:
985
                relpath = relpath.encode()
986
            except UnicodeEncodeError:
987
                raise InvalidURL(relpath)
6963.2.15 by Jelmer Vernooij
Accept unicode - for now.
988
        else:
6729.6.1 by Jelmer Vernooij
Move urlutils errors.
989
            raise InvalidURL(relpath)
6061.1.5 by Martin Packman
Unescape unreserved characters in relative portion when combining paths
990
        relpath = _url_hex_escapes_re.sub(_unescape_safe_chars, relpath)
6055.2.15 by Jelmer Vernooij
Add URL._combine_paths.
991
        if relpath.startswith('/'):
992
            base_parts = []
993
        else:
994
            base_parts = base_path.split('/')
995
        if len(base_parts) > 0 and base_parts[-1] == '':
996
            base_parts = base_parts[:-1]
997
        for p in relpath.split('/'):
998
            if p == '..':
999
                if len(base_parts) == 0:
1000
                    # In most filesystems, a request for the parent
1001
                    # of root, just returns root.
1002
                    continue
1003
                base_parts.pop()
1004
            elif p == '.':
1005
                continue # No-op
1006
            elif p != '':
1007
                base_parts.append(p)
1008
        path = '/'.join(base_parts)
1009
        if not path.startswith('/'):
1010
            path = '/' + path
1011
        return path
1012
6055.2.17 by Jelmer Vernooij
Add URL.clone().
1013
    def clone(self, offset=None):
1014
        """Return a new URL for a path relative to this URL.
1015
1016
        :param offset: A relative path, already urlencoded
1017
        :return: `URL` instance
1018
        """
1019
        if offset is not None:
6963.1.1 by Jelmer Vernooij
Fix a bunch of tests on python3.
1020
            relative = unescape(offset)
1021
            if sys.version_info[0] == 2:
1022
                relative = relative.encode('utf-8')
6055.2.17 by Jelmer Vernooij
Add URL.clone().
1023
            path = self._combine_paths(self.path, relative)
6379.4.2 by Jelmer Vernooij
Add urlutils.quote / urlutils.unquote.
1024
            path = quote(path, safe="/~")
6055.2.17 by Jelmer Vernooij
Add URL.clone().
1025
        else:
1026
            path = self.quoted_path
1027
        return self.__class__(self.scheme, self.quoted_user,
1028
                self.quoted_password, self.quoted_host, self.port,
1029
                path)
1030
3873.3.1 by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test
1031
1032
def parse_url(url):
1033
    """Extract the server address, the credentials and the path from the url.
1034
1035
    user, password, host and path should be quoted if they contain reserved
1036
    chars.
1037
1038
    :param url: an quoted url
1039
    :return: (scheme, user, password, host, port, path) tuple, all fields
1040
        are unquoted.
1041
    """
6055.2.7 by Jelmer Vernooij
Change parse_url to URL.from_string.
1042
    parsed_url = URL.from_string(url)
6055.2.6 by Jelmer Vernooij
Split out parse_url.
1043
    return (parsed_url.scheme, parsed_url.user, parsed_url.password,
1044
        parsed_url.host, parsed_url.port, parsed_url.path)