/brz/remove-bazaar : revision 1685.1.51

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/urlutils.py

Committer: John Arbash Meinel
Date: 2006-05-10 19:43:34 UTC
mto: This revision was merged to the branch mainline in revision 1752.
Revision ID: john@arbash-meinel.com-20060510194334-0c20aad23237d047

Working on getting normalize_url working.

files modified:
bzrlib/tests/test_urlutils.py

bzrlib/urlutils.py

Show diffs side-by-side

added added

removed removed

bzrlib/urlutils.py

This is just a helper functions for other path utilities.

It could probably be replaced by urlparse

"""

scheme_loc = url.find('://')

if scheme_loc == -1:

m = _url_scheme_re.match(url)

if not m:

return None, None

scheme = m.group('scheme')

path = m.group('path')

# Find the path separating slash

# (first slash after the ://)

first_path_slash = url.find('/', scheme_loc+3)

first_path_slash = path.find('/')

if first_path_slash == -1:

return scheme_loc, None

return scheme_loc, first_path_slash

return scheme_loc, first_path_slash+len(scheme)+3

100

101

102

# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'

177

180

:param url: Either a hybrid URL or a local path

178

181

:return: A normalized URL which only includes 7-bit ASCII characters.

179

182

"""

180

if '://' not in url:

181

return local_path_from_url(url)

183

m = _url_scheme_re.match(url)

184

if not m:

185

return local_path_to_url(url)

186

if not isinstance(url, unicode):

187

# TODO: jam 20060510 We need to test for ascii characters that

188

# shouldn't be allowed in URLs

189

for c in url:

190

if c not in _url_safe_characters:

191

raise errors.InvalidURL(url, 'URLs can only contain specific safe characters')

192

return url

193

# We have a unicode (hybrid) url

194

scheme = m.group('scheme')

195

path = list(m.group('path'))

182

196

197

for i in xrange(len(path)):

198

if path[i] not in _url_safe_characters:

199

chars = path[i].encode('utf-8')

200

path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])

201

return scheme + '://' + ''.join(path)

183

202

184

203

185

204

def split(url, exclude_trailing_slash=True):

209

228

if sys.platform == 'win32' and url.startswith('file:///'):

210

229

# Strip off the drive letter

211

230

if path[2:3] not in '\\/':

212

raise InvalidURL(url,

231

raise errors.InvalidURL(url,

213

232

'win32 file:/// paths need a drive letter')

214

233

url_base += path[1:4] # file:///C|/

215

234

path = path[3:]

249

268

# of a win32 path is actually the drive letter

250

269

if len(url) > MIN_ABS_FILEURL_LENGTH:

251

270

return url[:-1]

271

else:

272

return url

252

273

scheme_loc, first_path_slash = _find_scheme_and_separator(url)

253

274

if scheme_loc is None:

254

275

# This is a relative path, as it has no scheme

294

315

+ ['%02X' % o for o in _no_decode_ords])

295

316

_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]

296

317

+ [('%02X' % o, chr(o)) for o in range(256)]))

318

#These entries get mapped to themselves

297

319

_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)

298

#These entries get mapped to themselves

320

321

# These characters should not be escaped

322

_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz'

323

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

324

'0123456789' '_.-/'

325

';?:@&=+$,%#')

299

326

300

327

301

328

def unescape_for_display(url):

Older »