/brz/remove-bazaar : revision 6670.4.7

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/urlutils.py

Committer: Jelmer Vernooij
Date: 2017-06-10 12:50:32 UTC
mfrom: (6679 work)
mto: This revision was merged to the branch mainline in revision 6690.
Revision ID: jelmer@jelmer.uk-20170610125032-xb5rd5fjskjallos

Merge trunk.

files removed:
breezy/plugins/email/README

files modified:
breezy/_rio_py.py

breezy/bzr/_dirstate_helpers_py.py

breezy/bzr/_dirstate_helpers_pyx.pyx

breezy/bzr/branch.py

breezy/bzr/btree_index.py

breezy/bzr/bzrdir.py

breezy/bzr/dirstate.py

breezy/bzr/index.py

breezy/bzr/inventory.py

breezy/bzr/pack_repo.py

breezy/bzr/repository.py

breezy/bzr/workingtree.py

breezy/bzr/workingtree_4.py

breezy/config.py

breezy/generate_ids.py

breezy/iterablefile.py

breezy/lockable_files.py

breezy/osutils.py

breezy/plugins/email/__init__.py

breezy/plugins/email/emailer.py

breezy/revision.py

breezy/rio.py

breezy/tests/__init__.py

breezy/tests/test__dirstate_helpers.py

breezy/tests/treeshape.py

breezy/transport/__init__.py

breezy/transport/local.py

breezy/urlutils.py

Show diffs side-by-side

added added

removed removed

breezy/urlutils.py

""")

from .sixish import (

PY3,

text_type,

)

# urllib module because urllib unconditionally imports socket, which imports

# ssl.

always_safe = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

b'abcdefghijklmnopqrstuvwxyz'

b'0123456789' b'_.-')

always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'

'abcdefghijklmnopqrstuvwxyz'

'0123456789' '_.-')

_safe_map = {}

for i, c in zip(range(256), bytes(bytearray(range(256)))):

_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i).encode('ascii')

for i, c in zip(range(256), ''.join(map(chr, range(256)))):

_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)

_safe_quoters = {}

def quote(s, safe=b'/'):

def quote(s, safe='/'):

"""quote('abc def') -> 'abc%20def'

Each part of a URL, e.g. the path info, the query, etc., has a

119

120

_safe_quoters[cachekey] = (quoter, safe)

120

121

if not s.rstrip(safe):

121

122

return s

122

return b''.join(map(quoter, s))

123

124

125

_hexdig = '0123456789ABCDEFabcdef'

126

_hextochr = dict((a + b, chr(int(a + b, 16)))

127

for a in _hexdig for b in _hexdig)

128

129

def unquote(s):

130

"""unquote('abc%20def') -> 'abc def'."""

131

res = s.split(b'%')

132

# fastpath

133

if len(res) == 1:

134

return s

135

s = res[0]

136

for item in res[1:]:

137

try:

138

s += _hextochr[item[:2]] + item[2:]

139

except KeyError:

140

s += b'%' + item

141

except UnicodeDecodeError:

142

s += unichr(int(item[:2], 16)) + item[2:]

143

return s

123

return ''.join(map(quoter, s))

124

125

126

unquote = urlparse.unquote

144

127

145

128

146

129

def escape(relpath):

147

130

"""Escape relpath to be a valid url."""

148

if isinstance(relpath, text_type):

131

if not isinstance(relpath, str):

149

132

relpath = relpath.encode('utf-8')

150

return quote(relpath, safe=b'/~')

133

return quote(relpath, safe='/~')

151

134

152

135

153

136

def file_relpath(base, path):

179

162

180

163

# Find the path separating slash

181

164

# (first slash after the ://)

182

first_path_slash = path.find(b'/')

165

first_path_slash = path.find('/')

183

166

if first_path_slash == -1:

184

167

return len(scheme), None

185

168

return len(scheme), first_path_slash+m.start('path')

236

219

We really should try to have exactly one place in the code base responsible

237

220

for combining paths of URLs.

238

221

"""

239

path = base.split(b'/')

240

if len(path) > 1 and path[-1] == b'':

222

path = base.split('/')

223

if len(path) > 1 and path[-1] == '':

241

224

#If the path ends in a trailing /, remove it.

242

225

path.pop()

243

226

for arg in args:

244

if arg.startswith(b'/'):

227

if arg.startswith('/'):

245

228

path = []

246

for chunk in arg.split(b'/'):

247

if chunk == b'.':

229

for chunk in arg.split('/'):

230

if chunk == '.':

248

231

continue

249

elif chunk == b'..':

250

if path == [b'']:

232

elif chunk == '..':

233

if path == ['']:

251

234

raise errors.InvalidURLJoin('Cannot go above root',

252

235

base, args)

253

236

path.pop()

254

237

else:

255

238

path.append(chunk)

256

if path == [b'']:

257

return b'/'

239

if path == ['']:

240

return '/'

258

241

else:

259

return b'/'.join(path)

242

return '/'.join(path)

260

243

261

244

262

245

# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'

263

246

def _posix_local_path_from_url(url):

264

247

"""Convert a url like file:///path/to/foo into /path/to/foo"""

265

248

url = split_segment_parameters_raw(url)[0]

266

file_localhost_prefix = b'file://localhost/'

249

file_localhost_prefix = 'file://localhost/'

267

250

if url.startswith(file_localhost_prefix):

268

251

path = url[len(file_localhost_prefix) - 1:]

269

elif not url.startswith(b'file:///'):

252

elif not url.startswith('file:///'):

270

253

raise errors.InvalidURL(

271

254

url, 'local urls must start with file:/// or file://localhost/')

272

255

else:

273

path = url[len(b'file://'):]

256

path = url[len('file://'):]

274

257

# We only strip off 2 slashes

275

258

return unescape(path)

276

259

282

265

"""

283

266

# importing directly from posixpath allows us to test this

284

267

# on non-posix platforms

285

return b'file://' + escape(osutils._posix_abspath(path))

268

return 'file://' + escape(osutils._posix_abspath(path))

286

269

287

270

288

271

def _win32_local_path_from_url(url):

350

333

MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH

351

334

352

335

353

_url_scheme_re = re.compile(b'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')

354

_url_hex_escapes_re = re.compile(b'(%[0-9a-fA-F]{2})')

336

_url_scheme_re = re.compile('^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')

337

_url_hex_escapes_re = re.compile('(%[0-9a-fA-F]{2})')

355

338

356

339

357

340

def _unescape_safe_chars(matchobj):

391

374

return local_path_to_url(url)

392

375

prefix = url[:path_start]

393

376

path = url[path_start:]

394

if not isinstance(url, unicode):

377

if not isinstance(url, text_type):

395

378

for c in url:

396

379

if c not in _url_safe_characters:

397

380

raise errors.InvalidURL(url, 'URLs can only contain specific'

527

510

# should not be blindly adding slashes in the first place.

528

511

lurl = strip_trailing_slash(url)

529

512

# Segments begin at first comma after last forward slash, if one exists

530

segment_start = lurl.find(b",", lurl.rfind(b"/")+1)

513

segment_start = lurl.find(",", lurl.rfind("/")+1)

531

514

if segment_start == -1:

532

515

return (url, [])

533

return (lurl[:segment_start], lurl[segment_start+1:].split(b","))

516

return (lurl[:segment_start], lurl[segment_start+1:].split(","))

534

517

535

518

536

519

def split_segment_parameters(url):

621

604

# format which does it differently.

622

605

file:///c|/ => file:///c:/

623

606

"""

624

if not url.endswith(b'/'):

607

if not url.endswith('/'):

625

608

# Nothing to do

626

609

return url

627

if sys.platform == 'win32' and url.startswith(b'file://'):

610

if sys.platform == 'win32' and url.startswith('file://'):

628

611

return _win32_strip_local_trailing_slash(url)

629

612

630

613

scheme_loc, first_path_slash = _find_scheme_and_separator(url)

658

641

url = url.encode("ascii")

659

642

except UnicodeError as e:

660

643

raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))

661

662

unquoted = unquote(url)

644

if PY3:

645

unquoted = urlparse.unquote_to_bytes(url)

646

else:

647

unquoted = unquote(url)

663

648

try:

664

649

unicode_path = unquoted.decode('utf-8')

665

650

except UnicodeError as e:

859

844

860

845

:param url: URL as bytestring

861

846

"""

862

if isinstance(url, unicode):

847

# GZ 2017-06-09: Actually validate ascii-ness

848

if not isinstance(url, str):

863

849

raise errors.InvalidURL('should be ascii:\n%r' % url)

864

url = url.encode('utf-8')

865

850

(scheme, netloc, path, params,

866

851

query, fragment) = urlparse.urlparse(url, allow_fragments=False)

867

852

user = password = host = port = None

Older »