/brz/remove-bazaar : contents of bzrlib/urlutils.py at revision 1685.1.45

: (revision 1685.1.45)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Bazaar-NG -- distributed version control
#
# Copyright (C) 2006 by Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""A collection of function for handling URL operations."""

import urllib
import sys

import bzrlib.errors as errors
import bzrlib.osutils


def escape(relpath):
    """Escape relpath to be a valid url."""
    if isinstance(relpath, unicode):
        relpath = relpath.encode('utf-8')
    # After quoting and encoding, the path should be perfectly
    # safe as a plain ASCII string, str() just enforces this
    return str(urllib.quote(relpath))


def unescape(url):
    """Unescape relpath from url format.

    This returns a Unicode path from a URL
    """
    # jam 20060427 URLs are supposed to be ASCII only strings
    #       If they are passed in as unicode, urllib.unquote
    #       will return a UNICODE string, which actually contains
    #       utf-8 bytes. So we have to ensure that they are
    #       plain ASCII strings, or the final .decode will
    #       try to encode the UNICODE => ASCII, and then decode
    #       it into utf-8.
    try:
        url = str(url)
    except UnicodeError, e:
        raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
    unquoted = urllib.unquote(url)
    try:
        unicode_path = unquoted.decode('utf-8')
    except UnicodeError, e:
        raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
    return unicode_path


def file_relpath(base, path):
    """Compute just the relative sub-portion of a url
    
    This assumes that both paths are already fully specified file:// URLs.
    """
    assert len(base) >= MIN_ABS_URLPATHLENGTH, ('Length of base must be equal or'
        ' exceed the platform minimum url length (which is %d)' % 
        MIN_ABS_URLPATHLENGTH)

    base = local_path_from_url(base)
    path = local_path_from_url(path)
    return escape(bzrlib.osutils.relpath(base, path))


def strip_trailing_slash(url):
    """Strip trailing slash, except for root paths.

    The definition of 'root path' is platform-dependent.
    But the passed in URL must be a file:/// url.
    """
    assert url.startswith('file:///'), \
        'strip_trailing_slash expects file:// urls (%s)' % url
    if len(url) != MIN_ABS_URLPATHLENGTH and url[-1] == '/':
        return url[:-1]
    else:
        return url


# These are characters that if escaped, should stay that way
_no_decode_chars = ';/?:@&=+$,#'
_no_decode_ords = [ord(c) for c in _no_decode_chars]
_no_decode_hex = (['%02x' % o for o in _no_decode_ords] 
                + ['%02X' % o for o in _no_decode_ords])
_hex_display_map = urllib._hextochr.copy()
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
#These entries get mapped to themselves


def unescape_for_display(url):
    """Decode what you can for a URL, so that we get a nice looking path.

    This will turn file:// urls into local paths, and try to decode
    any portions of a http:// style url that it can.
    """
    if url.startswith('file://'):
        return local_path_from_url(url)

    # Split into sections to try to decode utf-8
    res = url.split('/')
    for i in xrange(1, len(res)):
        escaped_chunks = res[i].split('%')
        for j in xrange(1, len(escaped_chunks)):
            item = escaped_chunks[j]
            try:
                escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
            except KeyError:
                # Put back the percent symbol
                escaped_chunks[j] = '%' + item
            except UnicodeDecodeError:
                escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
        unescaped = ''.join(escaped_chunks)
        try:
            res[i] = unescaped.decode('utf-8')
        except UnicodeDecodeError:
            # If this path segment cannot be properly utf-8 decoded
            # after doing unescaping we will just leave it alone
            pass
    return '/'.join(res)

def _posix_local_path_to_url(path):
    """Convert a local path like ./foo into a URL like file:///path/to/foo

    This also handles transforming escaping unicode characters, etc.
    """
    # importing directly from posixpath allows us to test this 
    # on non-posix platforms
    from posixpath import normpath
    return 'file://' + escape(normpath(bzrlib.osutils._posix_abspath(path)))


def _posix_local_path_from_url(url):
    """Convert a url like file:///path/to/foo into /path/to/foo"""
    if not url.startswith('file:///'):
        raise errors.InvalidURL(url, 'local urls must start with file:///')
    # We only strip off 2 slashes
    return unescape(url[len('file://'):])


def _win32_local_path_to_url(path):
    """Convert a local path like ./foo into a URL like file:///C|/path/to/foo

    This also handles transforming escaping unicode characters, etc.
    """
    # importing directly from ntpath allows us to test this 
    # on non-win32 platforms
    # TODO: jam 20060426 consider moving this import outside of the function
    win32_path = bzrlib.osutils._nt_normpath(
        bzrlib.osutils._win32_abspath(path)).replace('\\', '/')
    return 'file:///' + win32_path[0].upper() + '|' + escape(win32_path[2:])


def _win32_local_path_from_url(url):
    """Convert a url like file:///C|/path/to/foo into C:/path/to/foo"""
    if not url.startswith('file:///'):
        raise errors.InvalidURL(url, 'local urls must start with file:///')
    # We strip off all 3 slashes
    win32_url = url[len('file:///'):]
    if (win32_url[0] not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
        or win32_url[1] not in  '|:'
        or win32_url[2] != '/'):
        raise errors.InvalidURL(url, 'Win32 file urls start with file:///X|/, where X is a valid drive letter')
    # TODO: jam 20060426, we could .upper() or .lower() the drive letter
    #       for better consistency.
    return win32_url[0].upper() + u':' + unescape(win32_url[2:])


local_path_to_url = _posix_local_path_to_url
local_path_from_url = _posix_local_path_from_url
MIN_ABS_URLPATHLENGTH = len('file:///')

if sys.platform == 'win32':
    local_path_to_url = _win32_local_path_to_url
    local_path_from_url = _win32_local_path_from_url

    MIN_ABS_URLPATHLENGTH = len('file:///C|/')

1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	1	# Bazaar-NG -- distributed version control
	2	#
	3	# Copyright (C) 2006 by Canonical Ltd
	4	#
	5	# This program is free software; you can redistribute it and/or modify
	6	# it under the terms of the GNU General Public License as published by
	7	# the Free Software Foundation; either version 2 of the License, or
	8	# (at your option) any later version.
	9	#
	10	# This program is distributed in the hope that it will be useful,
	11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	# GNU General Public License for more details.
	14	#
	15	# You should have received a copy of the GNU General Public License
	16	# along with this program; if not, write to the Free Software
	17	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	18
	19	"""A collection of function for handling URL operations."""
	20
	21	import urllib
	22	import sys
	23
	24	import bzrlib.errors as errors
	25	import bzrlib.osutils
	26
	27
	28	def escape(relpath):
	29	"""Escape relpath to be a valid url."""
	30	if isinstance(relpath, unicode):
	31	relpath = relpath.encode('utf-8')
	32	# After quoting and encoding, the path should be perfectly
	33	# safe as a plain ASCII string, str() just enforces this
	34	return str(urllib.quote(relpath))
	35
	36
	37	def unescape(url):
	38	"""Unescape relpath from url format.
	39
	40	This returns a Unicode path from a URL
	41	"""
	42	# jam 20060427 URLs are supposed to be ASCII only strings
	43	# If they are passed in as unicode, urllib.unquote
	44	# will return a UNICODE string, which actually contains
	45	# utf-8 bytes. So we have to ensure that they are
	46	# plain ASCII strings, or the final .decode will
	47	# try to encode the UNICODE => ASCII, and then decode
	48	# it into utf-8.
	49	try:
	50	url = str(url)
	51	except UnicodeError, e:
	52	raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
	53	unquoted = urllib.unquote(url)
	54	try:
	55	unicode_path = unquoted.decode('utf-8')
	56	except UnicodeError, e:
	57	raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
	58	return unicode_path
	59
	60
	61	def file_relpath(base, path):
	62	"""Compute just the relative sub-portion of a url
	63
	64	This assumes that both paths are already fully specified file:// URLs.
65	"""
66	assert len(base) >= MIN_ABS_URLPATHLENGTH, ('Length of base must be equal or'
67	' exceed the platform minimum url length (which is %d)' %
68	MIN_ABS_URLPATHLENGTH)
69
70	base = local_path_from_url(base)
71	path = local_path_from_url(path)
72	return escape(bzrlib.osutils.relpath(base, path))
73
74
75	def strip_trailing_slash(url):
76	"""Strip trailing slash, except for root paths.
77
78	The definition of 'root path' is platform-dependent.
79	But the passed in URL must be a file:/// url.
80	"""
81	assert url.startswith('file:///'), \
82	'strip_trailing_slash expects file:// urls (%s)' % url
83	if len(url) != MIN_ABS_URLPATHLENGTH and url[-1] == '/':
84	return url[:-1]
85	else:
86	return url
87
88
89	# These are characters that if escaped, should stay that way
90	_no_decode_chars = ';/?:@&=+$,#'
91	_no_decode_ords = [ord(c) for c in _no_decode_chars]
92	_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
93	+ ['%02X' % o for o in _no_decode_ords])
94	_hex_display_map = urllib._hextochr.copy()
95	_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
96	#These entries get mapped to themselves
97
98
99	def unescape_for_display(url):
100	"""Decode what you can for a URL, so that we get a nice looking path.
101
102	This will turn file:// urls into local paths, and try to decode
103	any portions of a http:// style url that it can.
104	"""
105	if url.startswith('file://'):
106	return local_path_from_url(url)
107
108	# Split into sections to try to decode utf-8
109	res = url.split('/')
110	for i in xrange(1, len(res)):
111	escaped_chunks = res[i].split('%')
112	for j in xrange(1, len(escaped_chunks)):
113	item = escaped_chunks[j]
114	try:
115	escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
116	except KeyError:
117	# Put back the percent symbol
118	escaped_chunks[j] = '%' + item
119	except UnicodeDecodeError:
120	escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
121	unescaped = ''.join(escaped_chunks)
122	try:
123	res[i] = unescaped.decode('utf-8')
124	except UnicodeDecodeError:
125	# If this path segment cannot be properly utf-8 decoded
126	# after doing unescaping we will just leave it alone
127	pass
128	return '/'.join(res)
129
130	def _posix_local_path_to_url(path):
131	"""Convert a local path like ./foo into a URL like file:///path/to/foo
132
133	This also handles transforming escaping unicode characters, etc.
134	"""
135	# importing directly from posixpath allows us to test this
136	# on non-posix platforms
137	from posixpath import normpath
138	return 'file://' + escape(normpath(bzrlib.osutils._posix_abspath(path)))
139
140
141	def _posix_local_path_from_url(url):
142	"""Convert a url like file:///path/to/foo into /path/to/foo"""
143	if not url.startswith('file:///'):
144	raise errors.InvalidURL(url, 'local urls must start with file:///')
145	# We only strip off 2 slashes
146	return unescape(url[len('file://'):])
147
148
149	def _win32_local_path_to_url(path):
150	"""Convert a local path like ./foo into a URL like file:///C\|/path/to/foo
151
152	This also handles transforming escaping unicode characters, etc.
153	"""
154	# importing directly from ntpath allows us to test this
155	# on non-win32 platforms
156	# TODO: jam 20060426 consider moving this import outside of the function
157	win32_path = bzrlib.osutils._nt_normpath(
158	bzrlib.osutils._win32_abspath(path)).replace('\\', '/')
159	return 'file:///' + win32_path[0].upper() + '\|' + escape(win32_path[2:])
160
161
162	def _win32_local_path_from_url(url):
163	"""Convert a url like file:///C\|/path/to/foo into C:/path/to/foo"""
164	if not url.startswith('file:///'):
165	raise errors.InvalidURL(url, 'local urls must start with file:///')
166	# We strip off all 3 slashes
167	win32_url = url[len('file:///'):]
168	if (win32_url[0] not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
169	or win32_url[1] not in '\|:'
170	or win32_url[2] != '/'):
171	raise errors.InvalidURL(url, 'Win32 file urls start with file:///X\|/, where X is a valid drive letter')
172	# TODO: jam 20060426, we could .upper() or .lower() the drive letter
173	# for better consistency.
174	return win32_url[0].upper() + u':' + unescape(win32_url[2:])
175
176
177	local_path_to_url = _posix_local_path_to_url
178	local_path_from_url = _posix_local_path_from_url
179	MIN_ABS_URLPATHLENGTH = len('file:///')
180
181	if sys.platform == 'win32':
182	local_path_to_url = _win32_local_path_to_url
183	local_path_from_url = _win32_local_path_from_url
184
185	MIN_ABS_URLPATHLENGTH = len('file:///C\|/')