bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
1 |
# Copyright (C) 2005-2010 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Base implementation of Transport over http using urllib.
|
|
18 |
||
19 |
There are separate implementation modules for each http client implementation.
|
|
20 |
"""
|
|
21 |
||
22 |
from __future__ import absolute_import |
|
23 |
||
24 |
DEBUG = 0 |
|
25 |
||
26 |
import base64 |
|
27 |
import cgi |
|
28 |
import errno |
|
29 |
import os |
|
30 |
import re |
|
31 |
import socket |
|
32 |
import ssl |
|
33 |
import sys |
|
34 |
import time |
|
35 |
import urllib |
|
36 |
import weakref |
|
37 |
||
38 |
try: |
|
39 |
import http.client as http_client |
|
40 |
except ImportError: |
|
41 |
import httplib as http_client |
|
42 |
try: |
|
43 |
import urllib.request as urllib_request |
|
44 |
except ImportError: # python < 3 |
|
45 |
import urllib2 as urllib_request |
|
46 |
try: |
|
47 |
from urllib.parse import urljoin, splitport, splittype, splithost, urlencode |
|
48 |
except ImportError: |
|
49 |
from urlparse import urljoin |
|
50 |
from urllib import splitport, splittype, splithost, urlencode |
|
51 |
||
52 |
# TODO: handle_response should be integrated into the http/__init__.py
|
|
53 |
from .response import handle_response |
|
54 |
||
55 |
# FIXME: Oversimplifying, two kind of exceptions should be
|
|
56 |
# raised, once a request is issued: URLError before we have been
|
|
57 |
# able to process the response, HTTPError after that. Process the
|
|
58 |
# response means we are able to leave the socket clean, so if we
|
|
59 |
# are not able to do that, we should close the connection. The
|
|
60 |
# actual code more or less do that, tests should be written to
|
|
61 |
# ensure that.
|
|
62 |
||
63 |
from ... import __version__ as breezy_version |
|
64 |
from ... import ( |
|
65 |
config, |
|
66 |
debug, |
|
67 |
errors, |
|
68 |
lazy_import, |
|
69 |
osutils, |
|
70 |
trace, |
|
71 |
transport, |
|
72 |
ui, |
|
73 |
urlutils, |
|
74 |
)
|
|
75 |
from ...bzr.smart import medium |
|
76 |
from ...trace import mutter |
|
77 |
from ...transport import ( |
|
78 |
ConnectedTransport, |
|
79 |
UnusableRedirect, |
|
80 |
)
|
|
81 |
||
82 |
from . import default_user_agent, ssl |
|
83 |
||
84 |
||
85 |
checked_kerberos = False |
|
86 |
kerberos = None |
|
87 |
||
88 |
||
89 |
class addinfourl(urllib_request.addinfourl): |
|
90 |
'''Replacement addinfourl class compatible with python-2.7's xmlrpclib |
|
91 |
||
92 |
In python-2.7, xmlrpclib expects that the response object that it receives
|
|
93 |
has a getheader method. http_client.HTTPResponse provides this but
|
|
94 |
urllib_request.addinfourl does not. Add the necessary functions here, ported to
|
|
95 |
use the internal data structures of addinfourl.
|
|
96 |
'''
|
|
97 |
||
98 |
def getheader(self, name, default=None): |
|
99 |
if self.headers is None: |
|
100 |
raise http_client.ResponseNotReady() |
|
101 |
return self.headers.getheader(name, default) |
|
102 |
||
103 |
def getheaders(self): |
|
104 |
if self.headers is None: |
|
105 |
raise http_client.ResponseNotReady() |
|
106 |
return list(self.headers.items()) |
|
107 |
||
108 |
||
109 |
class _ReportingFileSocket(object): |
|
110 |
||
111 |
def __init__(self, filesock, report_activity=None): |
|
112 |
self.filesock = filesock |
|
113 |
self._report_activity = report_activity |
|
114 |
||
115 |
def report_activity(self, size, direction): |
|
116 |
if self._report_activity: |
|
117 |
self._report_activity(size, direction) |
|
118 |
||
119 |
def read(self, size=1): |
|
120 |
s = self.filesock.read(size) |
|
121 |
self.report_activity(len(s), 'read') |
|
122 |
return s |
|
123 |
||
124 |
def readline(self, size=-1): |
|
125 |
s = self.filesock.readline(size) |
|
126 |
self.report_activity(len(s), 'read') |
|
127 |
return s |
|
128 |
||
129 |
def readinto(self, b): |
|
130 |
s = self.filesock.readinto(b) |
|
131 |
self.report_activity(s, 'read') |
|
132 |
return s |
|
133 |
||
134 |
def __getattr__(self, name): |
|
135 |
return getattr(self.filesock, name) |
|
136 |
||
137 |
||
138 |
class _ReportingSocket(object): |
|
139 |
||
140 |
def __init__(self, sock, report_activity=None): |
|
141 |
self.sock = sock |
|
142 |
self._report_activity = report_activity |
|
143 |
||
144 |
def report_activity(self, size, direction): |
|
145 |
if self._report_activity: |
|
146 |
self._report_activity(size, direction) |
|
147 |
||
148 |
def sendall(self, s, *args): |
|
149 |
self.sock.sendall(s, *args) |
|
150 |
self.report_activity(len(s), 'write') |
|
151 |
||
152 |
def recv(self, *args): |
|
153 |
s = self.sock.recv(*args) |
|
154 |
self.report_activity(len(s), 'read') |
|
155 |
return s |
|
156 |
||
157 |
def makefile(self, mode='r', bufsize=-1): |
|
158 |
# http_client creates a fileobject that doesn't do buffering, which
|
|
159 |
# makes fp.readline() very expensive because it only reads one byte
|
|
160 |
# at a time. So we wrap the socket in an object that forces
|
|
161 |
# sock.makefile to make a buffered file.
|
|
162 |
fsock = self.sock.makefile(mode, 65536) |
|
163 |
# And wrap that into a reporting kind of fileobject
|
|
164 |
return _ReportingFileSocket(fsock, self._report_activity) |
|
165 |
||
166 |
def __getattr__(self, name): |
|
167 |
return getattr(self.sock, name) |
|
168 |
||
169 |
||
170 |
# We define our own Response class to keep our http_client pipe clean
|
|
171 |
class Response(http_client.HTTPResponse): |
|
172 |
"""Custom HTTPResponse, to avoid the need to decorate. |
|
173 |
||
174 |
http_client prefers to decorate the returned objects, rather
|
|
175 |
than using a custom object.
|
|
176 |
"""
|
|
177 |
||
178 |
# Some responses have bodies in which we have no interest
|
|
179 |
_body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501] |
|
180 |
||
181 |
# in finish() below, we may have to discard several MB in the worst
|
|
182 |
# case. To avoid buffering that much, we read and discard by chunks
|
|
183 |
# instead. The underlying file is either a socket or a StringIO, so reading
|
|
184 |
# 8k chunks should be fine.
|
|
185 |
_discarded_buf_size = 8192 |
|
186 |
||
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
187 |
def __init__(self, sock, debuglevel=0, method=None, url=None): |
188 |
self.url = url |
|
189 |
super(Response, self).__init__( |
|
190 |
sock, debuglevel=debuglevel, method=method, url=url) |
|
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
191 |
|
192 |
def begin(self): |
|
193 |
"""Begin to read the response from the server. |
|
194 |
||
195 |
http_client assumes that some responses get no content and do
|
|
196 |
not even attempt to read the body in that case, leaving
|
|
197 |
the body in the socket, blocking the next request. Let's
|
|
198 |
try to workaround that.
|
|
199 |
"""
|
|
200 |
http_client.HTTPResponse.begin(self) |
|
201 |
if self.status in self._body_ignored_responses: |
|
202 |
if self.debuglevel >= 2: |
|
203 |
print("For status: [%s], will ready body, length: %s" % ( |
|
204 |
self.status, self.length)) |
|
205 |
if not (self.length is None or self.will_close): |
|
206 |
# In some cases, we just can't read the body not
|
|
207 |
# even try or we may encounter a 104, 'Connection
|
|
208 |
# reset by peer' error if there is indeed no body
|
|
209 |
# and the server closed the connection just after
|
|
210 |
# having issued the response headers (even if the
|
|
211 |
# headers indicate a Content-Type...)
|
|
212 |
body = self.read(self.length) |
|
213 |
if self.debuglevel >= 9: |
|
214 |
# This one can be huge and is generally not interesting
|
|
215 |
print("Consumed body: [%s]" % body) |
|
216 |
self.close() |
|
217 |
elif self.status == 200: |
|
218 |
# Whatever the request is, it went ok, so we surely don't want to
|
|
219 |
# close the connection. Some cases are not correctly detected by
|
|
220 |
# http_client.HTTPConnection.getresponse (called by
|
|
221 |
# http_client.HTTPResponse.begin). The CONNECT response for the https
|
|
222 |
# through proxy case is one. Note: the 'will_close' below refers
|
|
223 |
# to the "true" socket between us and the server, whereas the
|
|
224 |
# 'close()' above refers to the copy of that socket created by
|
|
225 |
# http_client for the response itself. So, in the if above we close the
|
|
226 |
# socket to indicate that we are done with the response whereas
|
|
227 |
# below we keep the socket with the server opened.
|
|
228 |
self.will_close = False |
|
229 |
||
230 |
def finish(self): |
|
231 |
"""Finish reading the body. |
|
232 |
||
233 |
In some cases, the client may have left some bytes to read in the
|
|
234 |
body. That will block the next request to succeed if we use a
|
|
235 |
persistent connection. If we don't use a persistent connection, well,
|
|
236 |
nothing will block the next request since a new connection will be
|
|
237 |
issued anyway.
|
|
238 |
||
239 |
:return: the number of bytes left on the socket (may be None)
|
|
240 |
"""
|
|
241 |
pending = None |
|
242 |
if not self.isclosed(): |
|
243 |
# Make sure nothing was left to be read on the socket
|
|
244 |
pending = 0 |
|
245 |
data = True |
|
246 |
while data and self.length: |
|
247 |
# read() will update self.length
|
|
248 |
data = self.read(min(self.length, self._discarded_buf_size)) |
|
249 |
pending += len(data) |
|
250 |
if pending: |
|
251 |
trace.mutter("%s bytes left on the HTTP socket", pending) |
|
252 |
self.close() |
|
253 |
return pending |
|
254 |
||
255 |
||
256 |
# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
|
|
257 |
class AbstractHTTPConnection: |
|
258 |
"""A custom HTTP(S) Connection, which can reset itself on a bad response""" |
|
259 |
||
260 |
response_class = Response |
|
261 |
||
262 |
# When we detect a server responding with the whole file to range requests,
|
|
263 |
# we want to warn. But not below a given thresold.
|
|
264 |
_range_warning_thresold = 1024 * 1024 |
|
265 |
||
266 |
def __init__(self, report_activity=None): |
|
267 |
self._response = None |
|
268 |
self._report_activity = report_activity |
|
269 |
self._ranges_received_whole_file = None |
|
270 |
||
271 |
def _mutter_connect(self): |
|
272 |
netloc = '%s:%s' % (self.host, self.port) |
|
273 |
if self.proxied_host is not None: |
|
274 |
netloc += '(proxy for %s)' % self.proxied_host |
|
275 |
trace.mutter('* About to connect() to %s' % netloc) |
|
276 |
||
277 |
def getresponse(self): |
|
278 |
"""Capture the response to be able to cleanup""" |
|
279 |
self._response = http_client.HTTPConnection.getresponse(self) |
|
280 |
return self._response |
|
281 |
||
282 |
def cleanup_pipe(self): |
|
283 |
"""Read the remaining bytes of the last response if any.""" |
|
284 |
if self._response is not None: |
|
285 |
try: |
|
286 |
pending = self._response.finish() |
|
287 |
# Warn the user (once)
|
|
288 |
if (self._ranges_received_whole_file is None |
|
289 |
and self._response.status == 200 |
|
290 |
and pending |
|
291 |
and pending > self._range_warning_thresold): |
|
292 |
self._ranges_received_whole_file = True |
|
293 |
trace.warning( |
|
294 |
'Got a 200 response when asking for multiple ranges,'
|
|
295 |
' does your server at %s:%s support range requests?', |
|
296 |
self.host, self.port) |
|
297 |
except socket.error as e: |
|
298 |
# It's conceivable that the socket is in a bad state here
|
|
299 |
# (including some test cases) and in this case, it doesn't need
|
|
300 |
# cleaning anymore, so no need to fail, we just get rid of the
|
|
301 |
# socket and let callers reconnect
|
|
302 |
if (len(e.args) == 0 |
|
303 |
or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)): |
|
304 |
raise
|
|
305 |
self.close() |
|
306 |
self._response = None |
|
307 |
# Preserve our preciousss
|
|
308 |
sock = self.sock |
|
309 |
self.sock = None |
|
310 |
# Let http_client.HTTPConnection do its housekeeping
|
|
311 |
self.close() |
|
312 |
# Restore our preciousss
|
|
313 |
self.sock = sock |
|
314 |
||
315 |
def _wrap_socket_for_reporting(self, sock): |
|
316 |
"""Wrap the socket before anybody use it.""" |
|
317 |
self.sock = _ReportingSocket(sock, self._report_activity) |
|
318 |
||
319 |
||
320 |
class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection): |
|
321 |
||
322 |
# XXX: Needs refactoring at the caller level.
|
|
323 |
def __init__(self, host, port=None, proxied_host=None, |
|
324 |
report_activity=None, ca_certs=None): |
|
325 |
AbstractHTTPConnection.__init__(self, report_activity=report_activity) |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
326 |
http_client.HTTPConnection.__init__(self, host, port) |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
327 |
self.proxied_host = proxied_host |
328 |
# ca_certs is ignored, it's only relevant for https
|
|
329 |
||
330 |
def connect(self): |
|
331 |
if 'http' in debug.debug_flags: |
|
332 |
self._mutter_connect() |
|
333 |
http_client.HTTPConnection.connect(self) |
|
334 |
self._wrap_socket_for_reporting(self.sock) |
|
335 |
||
336 |
||
337 |
class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection): |
|
338 |
||
339 |
def __init__(self, host, port=None, key_file=None, cert_file=None, |
|
340 |
proxied_host=None, |
|
341 |
report_activity=None, ca_certs=None): |
|
342 |
AbstractHTTPConnection.__init__(self, report_activity=report_activity) |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
343 |
http_client.HTTPSConnection.__init__( |
344 |
self, host, port, key_file, cert_file) |
|
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
345 |
self.proxied_host = proxied_host |
346 |
self.ca_certs = ca_certs |
|
347 |
||
348 |
def connect(self): |
|
349 |
if 'http' in debug.debug_flags: |
|
350 |
self._mutter_connect() |
|
351 |
http_client.HTTPConnection.connect(self) |
|
352 |
self._wrap_socket_for_reporting(self.sock) |
|
353 |
if self.proxied_host is None: |
|
354 |
self.connect_to_origin() |
|
355 |
||
356 |
def connect_to_origin(self): |
|
357 |
# FIXME JRV 2011-12-18: Use location config here?
|
|
358 |
config_stack = config.GlobalStack() |
|
359 |
cert_reqs = config_stack.get('ssl.cert_reqs') |
|
360 |
if self.proxied_host is not None: |
|
361 |
host = self.proxied_host.split(":", 1)[0] |
|
362 |
else: |
|
363 |
host = self.host |
|
364 |
if cert_reqs == ssl.CERT_NONE: |
|
365 |
ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host) |
|
366 |
ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert') |
|
367 |
ca_certs = None |
|
368 |
else: |
|
369 |
if self.ca_certs is None: |
|
370 |
ca_certs = config_stack.get('ssl.ca_certs') |
|
371 |
else: |
|
372 |
ca_certs = self.ca_certs |
|
373 |
if ca_certs is None: |
|
374 |
trace.warning( |
|
375 |
"No valid trusted SSL CA certificates file set. See "
|
|
376 |
"'brz help ssl.ca_certs' for more information on setting "
|
|
377 |
"trusted CAs.") |
|
378 |
try: |
|
379 |
ssl_context = ssl.create_default_context( |
|
380 |
purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs) |
|
381 |
ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE |
|
382 |
if self.cert_file: |
|
383 |
ssl_context.load_cert_chain( |
|
384 |
keyfile=self.key_file, certfile=self.cert_file) |
|
385 |
ssl_context.verify_mode = cert_reqs |
|
386 |
ssl_sock = ssl_context.wrap_socket( |
|
387 |
self.sock, server_hostname=self.host) |
|
388 |
except ssl.SSLError: |
|
389 |
trace.note( |
|
390 |
"\n" |
|
391 |
"See `brz help ssl.ca_certs` for how to specify trusted CA"
|
|
392 |
"certificates.\n" |
|
393 |
"Pass -Ossl.cert_reqs=none to disable certificate "
|
|
394 |
"verification entirely.\n") |
|
395 |
raise
|
|
396 |
# Wrap the ssl socket before anybody use it
|
|
397 |
self._wrap_socket_for_reporting(ssl_sock) |
|
398 |
||
399 |
||
400 |
class Request(urllib_request.Request): |
|
401 |
"""A custom Request object. |
|
402 |
||
403 |
urllib_request determines the request method heuristically (based on
|
|
404 |
the presence or absence of data). We set the method
|
|
405 |
statically.
|
|
406 |
||
407 |
The Request object tracks:
|
|
408 |
- the connection the request will be made on.
|
|
409 |
- the authentication parameters needed to preventively set
|
|
410 |
the authentication header once a first authentication have
|
|
411 |
been made.
|
|
412 |
"""
|
|
413 |
||
414 |
def __init__(self, method, url, data=None, headers={}, |
|
415 |
origin_req_host=None, unverifiable=False, |
|
416 |
connection=None, parent=None): |
|
417 |
urllib_request.Request.__init__( |
|
418 |
self, url, data, headers, |
|
419 |
origin_req_host, unverifiable) |
|
420 |
self.method = method |
|
421 |
self.connection = connection |
|
422 |
# To handle redirections
|
|
423 |
self.parent = parent |
|
424 |
self.redirected_to = None |
|
425 |
# Unless told otherwise, redirections are not followed
|
|
426 |
self.follow_redirections = False |
|
427 |
# auth and proxy_auth are dicts containing, at least
|
|
428 |
# (scheme, host, port, realm, user, password, protocol, path).
|
|
429 |
# The dict entries are mostly handled by the AuthHandler.
|
|
430 |
# Some authentication schemes may add more entries.
|
|
431 |
self.auth = {} |
|
432 |
self.proxy_auth = {} |
|
433 |
self.proxied_host = None |
|
434 |
||
435 |
def get_method(self): |
|
436 |
return self.method |
|
437 |
||
438 |
def set_proxy(self, proxy, type): |
|
439 |
"""Set the proxy and remember the proxied host.""" |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
440 |
host, port = splitport(self.host) |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
441 |
if port is None: |
442 |
# We need to set the default port ourselves way before it gets set
|
|
443 |
# in the HTTP[S]Connection object at build time.
|
|
444 |
if self.type == 'https': |
|
445 |
conn_class = HTTPSConnection |
|
446 |
else: |
|
447 |
conn_class = HTTPConnection |
|
448 |
port = conn_class.default_port |
|
449 |
self.proxied_host = '%s:%s' % (host, port) |
|
450 |
urllib_request.Request.set_proxy(self, proxy, type) |
|
451 |
# When urllib_request makes a https request with our wrapper code and a proxy,
|
|
452 |
# it sets Host to the https proxy, not the host we want to talk to.
|
|
453 |
# I'm fairly sure this is our fault, but what is the cause is an open
|
|
454 |
# question. -- Robert Collins May 8 2010.
|
|
455 |
self.add_unredirected_header('Host', self.proxied_host) |
|
456 |
||
457 |
||
458 |
class _ConnectRequest(Request): |
|
459 |
||
460 |
def __init__(self, request): |
|
461 |
"""Constructor |
|
462 |
||
463 |
:param request: the first request sent to the proxied host, already
|
|
464 |
processed by the opener (i.e. proxied_host is already set).
|
|
465 |
"""
|
|
466 |
# We give a fake url and redefine selector or urllib_request will be
|
|
467 |
# confused
|
|
468 |
Request.__init__(self, 'CONNECT', request.get_full_url(), |
|
469 |
connection=request.connection) |
|
470 |
if request.proxied_host is None: |
|
471 |
raise AssertionError() |
|
472 |
self.proxied_host = request.proxied_host |
|
473 |
||
474 |
@property
|
|
475 |
def selector(self): |
|
476 |
return self.proxied_host |
|
477 |
||
478 |
def get_selector(self): |
|
479 |
return self.selector |
|
480 |
||
481 |
def set_proxy(self, proxy, type): |
|
482 |
"""Set the proxy without remembering the proxied host. |
|
483 |
||
484 |
We already know the proxied host by definition, the CONNECT request
|
|
485 |
occurs only when the connection goes through a proxy. The usual
|
|
486 |
processing (masquerade the request so that the connection is done to
|
|
487 |
the proxy while the request is targeted at another host) does not apply
|
|
488 |
here. In fact, the connection is already established with proxy and we
|
|
489 |
just want to enable the SSL tunneling.
|
|
490 |
"""
|
|
491 |
urllib_request.Request.set_proxy(self, proxy, type) |
|
492 |
||
493 |
||
494 |
class ConnectionHandler(urllib_request.BaseHandler): |
|
495 |
"""Provides connection-sharing by pre-processing requests. |
|
496 |
||
497 |
urllib_request provides no way to access the HTTPConnection object
|
|
498 |
internally used. But we need it in order to achieve
|
|
499 |
connection sharing. So, we add it to the request just before
|
|
500 |
it is processed, and then we override the do_open method for
|
|
501 |
http[s] requests in AbstractHTTPHandler.
|
|
502 |
"""
|
|
503 |
||
504 |
handler_order = 1000 # after all pre-processings |
|
505 |
||
506 |
def __init__(self, report_activity=None, ca_certs=None): |
|
507 |
self._report_activity = report_activity |
|
508 |
self.ca_certs = ca_certs |
|
509 |
||
510 |
def create_connection(self, request, http_connection_class): |
|
511 |
host = request.host |
|
512 |
if not host: |
|
513 |
# Just a bit of paranoia here, this should have been
|
|
514 |
# handled in the higher levels
|
|
515 |
raise urlutils.InvalidURL(request.get_full_url(), 'no host given.') |
|
516 |
||
517 |
# We create a connection (but it will not connect until the first
|
|
518 |
# request is made)
|
|
519 |
try: |
|
520 |
connection = http_connection_class( |
|
521 |
host, proxied_host=request.proxied_host, |
|
522 |
report_activity=self._report_activity, |
|
523 |
ca_certs=self.ca_certs) |
|
524 |
except http_client.InvalidURL as exception: |
|
525 |
# There is only one occurrence of InvalidURL in http_client
|
|
526 |
raise urlutils.InvalidURL(request.get_full_url(), |
|
527 |
extra='nonnumeric port') |
|
528 |
||
529 |
return connection |
|
530 |
||
531 |
def capture_connection(self, request, http_connection_class): |
|
532 |
"""Capture or inject the request connection. |
|
533 |
||
534 |
Two cases:
|
|
535 |
- the request have no connection: create a new one,
|
|
536 |
||
537 |
- the request have a connection: this one have been used
|
|
538 |
already, let's capture it, so that we can give it to
|
|
539 |
another transport to be reused. We don't do that
|
|
540 |
ourselves: the Transport object get the connection from
|
|
541 |
a first request and then propagate it, from request to
|
|
542 |
request or to cloned transports.
|
|
543 |
"""
|
|
544 |
connection = request.connection |
|
545 |
if connection is None: |
|
546 |
# Create a new one
|
|
547 |
connection = self.create_connection(request, http_connection_class) |
|
548 |
request.connection = connection |
|
549 |
||
550 |
# All connections will pass here, propagate debug level
|
|
551 |
connection.set_debuglevel(DEBUG) |
|
552 |
return request |
|
553 |
||
554 |
def http_request(self, request): |
|
555 |
return self.capture_connection(request, HTTPConnection) |
|
556 |
||
557 |
def https_request(self, request): |
|
558 |
return self.capture_connection(request, HTTPSConnection) |
|
559 |
||
560 |
||
561 |
class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler): |
|
562 |
"""A custom handler for HTTP(S) requests. |
|
563 |
||
564 |
We overrive urllib_request.AbstractHTTPHandler to get a better
|
|
565 |
control of the connection, the ability to implement new
|
|
566 |
request types and return a response able to cope with
|
|
567 |
persistent connections.
|
|
568 |
"""
|
|
569 |
||
570 |
# We change our order to be before urllib_request HTTP[S]Handlers
|
|
571 |
# and be chosen instead of them (the first http_open called
|
|
572 |
# wins).
|
|
573 |
handler_order = 400 |
|
574 |
||
575 |
_default_headers = {'Pragma': 'no-cache', |
|
576 |
'Cache-control': 'max-age=0', |
|
577 |
'Connection': 'Keep-Alive', |
|
578 |
'User-agent': default_user_agent(), |
|
579 |
'Accept': '*/*', |
|
580 |
}
|
|
581 |
||
582 |
def __init__(self): |
|
583 |
urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG) |
|
584 |
||
585 |
def http_request(self, request): |
|
586 |
"""Common headers setting""" |
|
587 |
||
588 |
for name, value in self._default_headers.items(): |
|
589 |
if name not in request.headers: |
|
590 |
request.headers[name] = value |
|
591 |
# FIXME: We may have to add the Content-Length header if
|
|
592 |
# we have data to send.
|
|
593 |
return request |
|
594 |
||
595 |
def retry_or_raise(self, http_class, request, first_try): |
|
596 |
"""Retry the request (once) or raise the exception. |
|
597 |
||
598 |
urllib_request raises exception of application level kind, we
|
|
599 |
just have to translate them.
|
|
600 |
||
601 |
http_client can raise exceptions of transport level (badly
|
|
602 |
formatted dialog, loss of connexion or socket level
|
|
603 |
problems). In that case we should issue the request again
|
|
604 |
(http_client will close and reopen a new connection if
|
|
605 |
needed).
|
|
606 |
"""
|
|
607 |
# When an exception occurs, we give back the original
|
|
608 |
# Traceback or the bugs are hard to diagnose.
|
|
609 |
exc_type, exc_val, exc_tb = sys.exc_info() |
|
610 |
if exc_type == socket.gaierror: |
|
611 |
# No need to retry, that will not help
|
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
612 |
origin_req_host = request.origin_req_host |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
613 |
raise errors.ConnectionError("Couldn't resolve host '%s'" |
614 |
% origin_req_host, |
|
615 |
orig_error=exc_val) |
|
616 |
elif isinstance(exc_val, http_client.ImproperConnectionState): |
|
617 |
# The http_client pipeline is in incorrect state, it's a bug in our
|
|
618 |
# implementation.
|
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
619 |
raise exc_val.with_traceback(exc_tb) |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
620 |
else: |
621 |
if first_try: |
|
622 |
if self._debuglevel >= 2: |
|
623 |
print('Received exception: [%r]' % exc_val) |
|
624 |
print(' On connection: [%r]' % request.connection) |
|
625 |
method = request.get_method() |
|
626 |
url = request.get_full_url() |
|
627 |
print(' Will retry, %s %r' % (method, url)) |
|
628 |
request.connection.close() |
|
629 |
response = self.do_open(http_class, request, False) |
|
630 |
else: |
|
631 |
if self._debuglevel >= 2: |
|
632 |
print('Received second exception: [%r]' % exc_val) |
|
633 |
print(' On connection: [%r]' % request.connection) |
|
634 |
if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol): |
|
635 |
# http_client.BadStatusLine and
|
|
636 |
# http_client.UnknownProtocol indicates that a
|
|
637 |
# bogus server was encountered or a bad
|
|
638 |
# connection (i.e. transient errors) is
|
|
639 |
# experimented, we have already retried once
|
|
640 |
# for that request so we raise the exception.
|
|
641 |
my_exception = errors.InvalidHttpResponse( |
|
642 |
request.get_full_url(), |
|
643 |
'Bad status line received', |
|
644 |
orig_error=exc_val) |
|
645 |
elif (isinstance(exc_val, socket.error) and len(exc_val.args) |
|
646 |
and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)): |
|
647 |
# 10053 == WSAECONNABORTED
|
|
648 |
# 10054 == WSAECONNRESET
|
|
649 |
raise errors.ConnectionReset( |
|
650 |
"Connection lost while sending request.") |
|
651 |
else: |
|
652 |
# All other exception are considered connection related.
|
|
653 |
||
654 |
# socket errors generally occurs for reasons
|
|
655 |
# far outside our scope, so closing the
|
|
656 |
# connection and retrying is the best we can
|
|
657 |
# do.
|
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
658 |
selector = request.selector |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
659 |
my_exception = errors.ConnectionError( |
660 |
msg='while sending %s %s:' % (request.get_method(), |
|
661 |
selector), |
|
662 |
orig_error=exc_val) |
|
663 |
||
664 |
if self._debuglevel >= 2: |
|
665 |
print('On connection: [%r]' % request.connection) |
|
666 |
method = request.get_method() |
|
667 |
url = request.get_full_url() |
|
668 |
print(' Failed again, %s %r' % (method, url)) |
|
669 |
print(' Will raise: [%r]' % my_exception) |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
670 |
raise my_exception.with_traceback(exc_tb) |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
671 |
return response |
672 |
||
673 |
def do_open(self, http_class, request, first_try=True): |
|
674 |
"""See urllib_request.AbstractHTTPHandler.do_open for the general idea. |
|
675 |
||
676 |
The request will be retried once if it fails.
|
|
677 |
"""
|
|
678 |
connection = request.connection |
|
679 |
if connection is None: |
|
680 |
raise AssertionError( |
|
681 |
'Cannot process a request without a connection') |
|
682 |
||
683 |
# Get all the headers
|
|
684 |
headers = {} |
|
685 |
headers.update(request.header_items()) |
|
686 |
headers.update(request.unredirected_hdrs) |
|
687 |
# Some servers or proxies will choke on headers not properly
|
|
688 |
# cased. http_client/urllib/urllib_request all use capitalize to get canonical
|
|
689 |
# header names, but only python2.5 urllib_request use title() to fix them just
|
|
690 |
# before sending the request. And not all versions of python 2.5 do
|
|
691 |
# that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
|
|
692 |
# ourself below.
|
|
693 |
headers = {name.title(): val for name, val in headers.items()} |
|
694 |
||
695 |
try: |
|
696 |
method = request.get_method() |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
697 |
url = request.selector |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
698 |
if sys.version_info[:2] >= (3, 6): |
699 |
connection._send_request(method, url, |
|
700 |
# FIXME: implements 100-continue
|
|
701 |
# None, # We don't send the body yet
|
|
702 |
request.data, |
|
703 |
headers, encode_chunked=False) |
|
704 |
else: |
|
705 |
connection._send_request(method, url, |
|
706 |
# FIXME: implements 100-continue
|
|
707 |
# None, # We don't send the body yet
|
|
708 |
request.data, |
|
709 |
headers) |
|
710 |
if 'http' in debug.debug_flags: |
|
711 |
trace.mutter('> %s %s' % (method, url)) |
|
712 |
hdrs = [] |
|
713 |
for k, v in headers.items(): |
|
714 |
# People are often told to paste -Dhttp output to help
|
|
715 |
# debug. Don't compromise credentials.
|
|
716 |
if k in ('Authorization', 'Proxy-Authorization'): |
|
717 |
v = '<masked>' |
|
718 |
hdrs.append('%s: %s' % (k, v)) |
|
719 |
trace.mutter('> ' + '\n> '.join(hdrs) + '\n') |
|
720 |
if self._debuglevel >= 1: |
|
721 |
print('Request sent: [%r] from (%s)' |
|
722 |
% (request, request.connection.sock.getsockname())) |
|
723 |
response = connection.getresponse() |
|
724 |
convert_to_addinfourl = True |
|
725 |
except (ssl.SSLError, ssl.CertificateError): |
|
726 |
# Something is wrong with either the certificate or the hostname,
|
|
727 |
# re-trying won't help
|
|
728 |
raise
|
|
729 |
except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol, |
|
730 |
socket.error, http_client.HTTPException): |
|
731 |
response = self.retry_or_raise(http_class, request, first_try) |
|
732 |
convert_to_addinfourl = False |
|
733 |
||
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
734 |
response.msg = response.reason |
735 |
return response |
|
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
736 |
|
737 |
# FIXME: HTTPConnection does not fully support 100-continue (the
|
|
738 |
# server responses are just ignored)
|
|
739 |
||
740 |
# if code == 100:
|
|
741 |
# mutter('Will send the body')
|
|
742 |
# # We can send the body now
|
|
743 |
# body = request.data
|
|
744 |
# if body is None:
|
|
745 |
# raise URLError("No data given")
|
|
746 |
# connection.send(body)
|
|
747 |
# response = connection.getresponse()
|
|
748 |
||
749 |
if self._debuglevel >= 2: |
|
750 |
print('Receives response: %r' % response) |
|
751 |
print(' For: %r(%r)' % (request.get_method(), |
|
752 |
request.get_full_url())) |
|
753 |
||
754 |
if convert_to_addinfourl: |
|
755 |
# Shamelessly copied from urllib_request
|
|
756 |
req = request |
|
757 |
r = response |
|
758 |
r.recv = r.read |
|
759 |
fp = socket._fileobject(r, bufsize=65536) |
|
760 |
resp = addinfourl(fp, r.msg, req.get_full_url()) |
|
761 |
resp.code = r.status |
|
762 |
resp.msg = r.reason |
|
763 |
resp.version = r.version |
|
764 |
if self._debuglevel >= 2: |
|
765 |
print('Create addinfourl: %r' % resp) |
|
766 |
print(' For: %r(%r)' % (request.get_method(), |
|
767 |
request.get_full_url())) |
|
768 |
if 'http' in debug.debug_flags: |
|
769 |
version = 'HTTP/%d.%d' |
|
770 |
try: |
|
771 |
version = version % (resp.version / 10, |
|
772 |
resp.version % 10) |
|
773 |
except: |
|
774 |
version = 'HTTP/%r' % resp.version |
|
775 |
trace.mutter('< %s %s %s' % (version, resp.code, |
|
776 |
resp.msg)) |
|
777 |
# Use the raw header lines instead of treating resp.info() as a
|
|
778 |
# dict since we may miss duplicated headers otherwise.
|
|
779 |
hdrs = [h.rstrip('\r\n') for h in resp.info().headers] |
|
780 |
trace.mutter('< ' + '\n< '.join(hdrs) + '\n') |
|
781 |
else: |
|
782 |
resp = response |
|
783 |
return resp |
|
784 |
||
785 |
||
786 |
class HTTPHandler(AbstractHTTPHandler): |
|
787 |
"""A custom handler that just thunks into HTTPConnection""" |
|
788 |
||
789 |
def http_open(self, request): |
|
790 |
return self.do_open(HTTPConnection, request) |
|
791 |
||
792 |
||
793 |
class HTTPSHandler(AbstractHTTPHandler): |
|
794 |
"""A custom handler that just thunks into HTTPSConnection""" |
|
795 |
||
796 |
https_request = AbstractHTTPHandler.http_request |
|
797 |
||
798 |
def https_open(self, request): |
|
799 |
connection = request.connection |
|
800 |
if connection.sock is None and \ |
|
801 |
connection.proxied_host is not None and \ |
|
802 |
request.get_method() != 'CONNECT': # Don't loop |
|
803 |
# FIXME: We need a gazillion connection tests here, but we still
|
|
804 |
# miss a https server :-( :
|
|
805 |
# - with and without proxy
|
|
806 |
# - with and without certificate
|
|
807 |
# - with self-signed certificate
|
|
808 |
# - with and without authentication
|
|
809 |
# - with good and bad credentials (especially the proxy auth around
|
|
810 |
# CONNECT)
|
|
811 |
# - with basic and digest schemes
|
|
812 |
# - reconnection on errors
|
|
813 |
# - connection persistence behaviour (including reconnection)
|
|
814 |
||
815 |
# We are about to connect for the first time via a proxy, we must
|
|
816 |
# issue a CONNECT request first to establish the encrypted link
|
|
817 |
connect = _ConnectRequest(request) |
|
818 |
response = self.parent.open(connect) |
|
819 |
if response.code != 200: |
|
820 |
raise errors.ConnectionError("Can't connect to %s via proxy %s" % ( |
|
821 |
connect.proxied_host, self.host)) |
|
822 |
# Housekeeping
|
|
823 |
connection.cleanup_pipe() |
|
824 |
# Establish the connection encryption
|
|
825 |
connection.connect_to_origin() |
|
826 |
# Propagate the connection to the original request
|
|
827 |
request.connection = connection |
|
828 |
return self.do_open(HTTPSConnection, request) |
|
829 |
||
830 |
||
831 |
class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler): |
|
832 |
"""Handles redirect requests. |
|
833 |
||
834 |
We have to implement our own scheme because we use a specific
|
|
835 |
Request object and because we want to implement a specific
|
|
836 |
policy.
|
|
837 |
"""
|
|
838 |
_debuglevel = DEBUG |
|
839 |
# RFC2616 says that only read requests should be redirected
|
|
840 |
# without interacting with the user. But Breezy uses some
|
|
841 |
# shortcuts to optimize against roundtrips which can leads to
|
|
842 |
# write requests being issued before read requests of
|
|
843 |
# containing dirs can be redirected. So we redirect write
|
|
844 |
# requests in the same way which seems to respect the spirit
|
|
845 |
# of the RFC if not its letter.
|
|
846 |
||
847 |
def redirect_request(self, req, fp, code, msg, headers, newurl): |
|
848 |
"""See urllib_request.HTTPRedirectHandler.redirect_request""" |
|
849 |
# We would have preferred to update the request instead
|
|
850 |
# of creating a new one, but the urllib_request.Request object
|
|
851 |
# has a too complicated creation process to provide a
|
|
852 |
# simple enough equivalent update process. Instead, when
|
|
853 |
# redirecting, we only update the following request in
|
|
854 |
# the redirect chain with a reference to the parent
|
|
855 |
# request .
|
|
856 |
||
857 |
# Some codes make no sense in our context and are treated
|
|
858 |
# as errors:
|
|
859 |
||
860 |
# 300: Multiple choices for different representations of
|
|
861 |
# the URI. Using that mechanisn with Breezy will violate the
|
|
862 |
# protocol neutrality of Transport.
|
|
863 |
||
864 |
# 304: Not modified (SHOULD only occurs with conditional
|
|
865 |
# GETs which are not used by our implementation)
|
|
866 |
||
867 |
# 305: Use proxy. I can't imagine this one occurring in
|
|
868 |
# our context-- vila/20060909
|
|
869 |
||
870 |
# 306: Unused (if the RFC says so...)
|
|
871 |
||
872 |
# If the code is 302 and the request is HEAD, some may
|
|
873 |
# think that it is a sufficent hint that the file exists
|
|
874 |
# and that we MAY avoid following the redirections. But
|
|
875 |
# if we want to be sure, we MUST follow them.
|
|
876 |
||
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
877 |
origin_req_host = req.origin_req_host |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
878 |
|
879 |
if code in (301, 302, 303, 307, 308): |
|
880 |
return Request(req.get_method(), newurl, |
|
881 |
headers=req.headers, |
|
882 |
origin_req_host=origin_req_host, |
|
883 |
unverifiable=True, |
|
884 |
# TODO: It will be nice to be able to
|
|
885 |
# detect virtual hosts sharing the same
|
|
886 |
# IP address, that will allow us to
|
|
887 |
# share the same connection...
|
|
888 |
connection=None, |
|
889 |
parent=req, |
|
890 |
)
|
|
891 |
else: |
|
892 |
raise urllib_request.HTTPError( |
|
893 |
req.get_full_url(), code, msg, headers, fp) |
|
894 |
||
895 |
def http_error_302(self, req, fp, code, msg, headers): |
|
896 |
"""Requests the redirected to URI. |
|
897 |
||
898 |
Copied from urllib_request to be able to clean the pipe of the associated
|
|
899 |
connection, *before* issuing the redirected request but *after* having
|
|
900 |
eventually raised an error.
|
|
901 |
"""
|
|
902 |
# Some servers (incorrectly) return multiple Location headers
|
|
903 |
# (so probably same goes for URI). Use first header.
|
|
904 |
||
905 |
# TODO: Once we get rid of addinfourl objects, the
|
|
906 |
# following will need to be updated to use correct case
|
|
907 |
# for headers.
|
|
908 |
if 'location' in headers: |
|
909 |
newurl = headers.get('location') |
|
910 |
elif 'uri' in headers: |
|
911 |
newurl = headers.get('uri') |
|
912 |
else: |
|
913 |
return
|
|
914 |
||
915 |
newurl = urljoin(req.get_full_url(), newurl) |
|
916 |
||
917 |
if self._debuglevel >= 1: |
|
918 |
print('Redirected to: %s (followed: %r)' % (newurl, |
|
919 |
req.follow_redirections)) |
|
920 |
if req.follow_redirections is False: |
|
921 |
req.redirected_to = newurl |
|
922 |
return fp |
|
923 |
||
924 |
# This call succeeds or raise an error. urllib_request returns
|
|
925 |
# if redirect_request returns None, but our
|
|
926 |
# redirect_request never returns None.
|
|
927 |
redirected_req = self.redirect_request(req, fp, code, msg, headers, |
|
928 |
newurl) |
|
929 |
||
930 |
# loop detection
|
|
931 |
# .redirect_dict has a key url if url was previously visited.
|
|
932 |
if hasattr(req, 'redirect_dict'): |
|
933 |
visited = redirected_req.redirect_dict = req.redirect_dict |
|
934 |
if (visited.get(newurl, 0) >= self.max_repeats or |
|
935 |
len(visited) >= self.max_redirections): |
|
936 |
raise urllib_request.HTTPError(req.get_full_url(), code, |
|
937 |
self.inf_msg + msg, headers, fp) |
|
938 |
else: |
|
939 |
visited = redirected_req.redirect_dict = req.redirect_dict = {} |
|
940 |
visited[newurl] = visited.get(newurl, 0) + 1 |
|
941 |
||
942 |
# We can close the fp now that we are sure that we won't
|
|
943 |
# use it with HTTPError.
|
|
944 |
fp.close() |
|
945 |
# We have all we need already in the response
|
|
946 |
req.connection.cleanup_pipe() |
|
947 |
||
948 |
return self.parent.open(redirected_req) |
|
949 |
||
950 |
http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302 |
|
951 |
||
952 |
||
953 |
class ProxyHandler(urllib_request.ProxyHandler): |
|
954 |
"""Handles proxy setting. |
|
955 |
||
956 |
Copied and modified from urllib_request to be able to modify the request during
|
|
957 |
the request pre-processing instead of modifying it at _open time. As we
|
|
958 |
capture (or create) the connection object during request processing, _open
|
|
959 |
time was too late.
|
|
960 |
||
961 |
The main task is to modify the request so that the connection is done to
|
|
962 |
the proxy while the request still refers to the destination host.
|
|
963 |
||
964 |
Note: the proxy handling *may* modify the protocol used; the request may be
|
|
965 |
against an https server proxied through an http proxy. So, https_request
|
|
966 |
will be called, but later it's really http_open that will be called. This
|
|
967 |
explains why we don't have to call self.parent.open as the urllib_request did.
|
|
968 |
"""
|
|
969 |
||
970 |
# Proxies must be in front
|
|
971 |
handler_order = 100 |
|
972 |
_debuglevel = DEBUG |
|
973 |
||
974 |
def __init__(self, proxies=None): |
|
975 |
urllib_request.ProxyHandler.__init__(self, proxies) |
|
976 |
# First, let's get rid of urllib_request implementation
|
|
977 |
for type, proxy in self.proxies.items(): |
|
978 |
if self._debuglevel >= 3: |
|
979 |
print('Will unbind %s_open for %r' % (type, proxy)) |
|
980 |
delattr(self, '%s_open' % type) |
|
981 |
||
982 |
def bind_scheme_request(proxy, scheme): |
|
983 |
if proxy is None: |
|
984 |
return
|
|
985 |
scheme_request = scheme + '_request' |
|
986 |
if self._debuglevel >= 3: |
|
987 |
print('Will bind %s for %r' % (scheme_request, proxy)) |
|
988 |
setattr(self, scheme_request, |
|
989 |
lambda request: self.set_proxy(request, scheme)) |
|
990 |
# We are interested only by the http[s] proxies
|
|
991 |
http_proxy = self.get_proxy_env_var('http') |
|
992 |
bind_scheme_request(http_proxy, 'http') |
|
993 |
https_proxy = self.get_proxy_env_var('https') |
|
994 |
bind_scheme_request(https_proxy, 'https') |
|
995 |
||
996 |
def get_proxy_env_var(self, name, default_to='all'): |
|
997 |
"""Get a proxy env var. |
|
998 |
||
999 |
Note that we indirectly rely on
|
|
1000 |
urllib.getproxies_environment taking into account the
|
|
1001 |
uppercased values for proxy variables.
|
|
1002 |
"""
|
|
1003 |
try: |
|
1004 |
return self.proxies[name.lower()] |
|
1005 |
except KeyError: |
|
1006 |
if default_to is not None: |
|
1007 |
# Try to get the alternate environment variable
|
|
1008 |
try: |
|
1009 |
return self.proxies[default_to] |
|
1010 |
except KeyError: |
|
1011 |
pass
|
|
1012 |
return None |
|
1013 |
||
1014 |
def proxy_bypass(self, host): |
|
1015 |
"""Check if host should be proxied or not. |
|
1016 |
||
1017 |
:returns: True to skip the proxy, False otherwise.
|
|
1018 |
"""
|
|
1019 |
no_proxy = self.get_proxy_env_var('no', default_to=None) |
|
1020 |
bypass = self.evaluate_proxy_bypass(host, no_proxy) |
|
1021 |
if bypass is None: |
|
1022 |
# Nevertheless, there are platform-specific ways to
|
|
1023 |
# ignore proxies...
|
|
1024 |
return urllib_request.proxy_bypass(host) |
|
1025 |
else: |
|
1026 |
return bypass |
|
1027 |
||
1028 |
def evaluate_proxy_bypass(self, host, no_proxy): |
|
1029 |
"""Check the host against a comma-separated no_proxy list as a string. |
|
1030 |
||
1031 |
:param host: ``host:port`` being requested
|
|
1032 |
||
1033 |
:param no_proxy: comma-separated list of hosts to access directly.
|
|
1034 |
||
1035 |
:returns: True to skip the proxy, False not to, or None to
|
|
1036 |
leave it to urllib.
|
|
1037 |
"""
|
|
1038 |
if no_proxy is None: |
|
1039 |
# All hosts are proxied
|
|
1040 |
return False |
|
1041 |
hhost, hport = splitport(host) |
|
1042 |
# Does host match any of the domains mentioned in
|
|
1043 |
# no_proxy ? The rules about what is authorized in no_proxy
|
|
1044 |
# are fuzzy (to say the least). We try to allow most
|
|
1045 |
# commonly seen values.
|
|
1046 |
for domain in no_proxy.split(','): |
|
1047 |
domain = domain.strip() |
|
1048 |
if domain == '': |
|
1049 |
continue
|
|
1050 |
dhost, dport = splitport(domain) |
|
1051 |
if hport == dport or dport is None: |
|
1052 |
# Protect glob chars
|
|
1053 |
dhost = dhost.replace(".", r"\.") |
|
1054 |
dhost = dhost.replace("*", r".*") |
|
1055 |
dhost = dhost.replace("?", r".") |
|
1056 |
if re.match(dhost, hhost, re.IGNORECASE): |
|
1057 |
return True |
|
1058 |
# Nothing explicitly avoid the host
|
|
1059 |
return None |
|
1060 |
||
1061 |
def set_proxy(self, request, type): |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
1062 |
host = request.host |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
1063 |
if self.proxy_bypass(host): |
1064 |
return request |
|
1065 |
||
1066 |
proxy = self.get_proxy_env_var(type) |
|
1067 |
if self._debuglevel >= 3: |
|
1068 |
print('set_proxy %s_request for %r' % (type, proxy)) |
|
1069 |
# FIXME: python 2.5 urlparse provides a better _parse_proxy which can
|
|
1070 |
# grok user:password@host:port as well as
|
|
1071 |
# http://user:password@host:port
|
|
1072 |
||
1073 |
parsed_url = transport.ConnectedTransport._split_url(proxy) |
|
1074 |
if not parsed_url.host: |
|
1075 |
raise urlutils.InvalidURL(proxy, 'No host component') |
|
1076 |
||
1077 |
if request.proxy_auth == {}: |
|
1078 |
# No proxy auth parameter are available, we are handling the first
|
|
1079 |
# proxied request, intialize. scheme (the authentication scheme)
|
|
1080 |
# and realm will be set by the AuthHandler
|
|
1081 |
request.proxy_auth = { |
|
1082 |
'host': parsed_url.host, |
|
1083 |
'port': parsed_url.port, |
|
1084 |
'user': parsed_url.user, |
|
1085 |
'password': parsed_url.password, |
|
1086 |
'protocol': parsed_url.scheme, |
|
1087 |
# We ignore path since we connect to a proxy
|
|
1088 |
'path': None} |
|
1089 |
if parsed_url.port is None: |
|
1090 |
phost = parsed_url.host |
|
1091 |
else: |
|
1092 |
phost = parsed_url.host + ':%d' % parsed_url.port |
|
1093 |
request.set_proxy(phost, type) |
|
1094 |
if self._debuglevel >= 3: |
|
1095 |
print('set_proxy: proxy set to %s://%s' % (type, phost)) |
|
1096 |
return request |
|
1097 |
||
1098 |
||
1099 |
class AbstractAuthHandler(urllib_request.BaseHandler): |
|
1100 |
"""A custom abstract authentication handler for all http authentications. |
|
1101 |
||
1102 |
Provides the meat to handle authentication errors and
|
|
1103 |
preventively set authentication headers after the first
|
|
1104 |
successful authentication.
|
|
1105 |
||
1106 |
This can be used for http and proxy, as well as for basic, negotiate and
|
|
1107 |
digest authentications.
|
|
1108 |
||
1109 |
This provides an unified interface for all authentication handlers
|
|
1110 |
(urllib_request provides far too many with different policies).
|
|
1111 |
||
1112 |
The interaction between this handler and the urllib_request
|
|
1113 |
framework is not obvious, it works as follow:
|
|
1114 |
||
1115 |
opener.open(request) is called:
|
|
1116 |
||
1117 |
- that may trigger http_request which will add an authentication header
|
|
1118 |
(self.build_header) if enough info is available.
|
|
1119 |
||
1120 |
- the request is sent to the server,
|
|
1121 |
||
1122 |
- if an authentication error is received self.auth_required is called,
|
|
1123 |
we acquire the authentication info in the error headers and call
|
|
1124 |
self.auth_match to check that we are able to try the
|
|
1125 |
authentication and complete the authentication parameters,
|
|
1126 |
||
1127 |
- we call parent.open(request), that may trigger http_request
|
|
1128 |
and will add a header (self.build_header), but here we have
|
|
1129 |
all the required info (keep in mind that the request and
|
|
1130 |
authentication used in the recursive calls are really (and must be)
|
|
1131 |
the *same* objects).
|
|
1132 |
||
1133 |
- if the call returns a response, the authentication have been
|
|
1134 |
successful and the request authentication parameters have been updated.
|
|
1135 |
"""
|
|
1136 |
||
1137 |
scheme = None |
|
1138 |
"""The scheme as it appears in the server header (lower cased)""" |
|
1139 |
||
1140 |
_max_retry = 3 |
|
1141 |
"""We don't want to retry authenticating endlessly""" |
|
1142 |
||
1143 |
requires_username = True |
|
1144 |
"""Whether the auth mechanism requires a username.""" |
|
1145 |
||
1146 |
# The following attributes should be defined by daughter
|
|
1147 |
# classes:
|
|
1148 |
# - auth_required_header: the header received from the server
|
|
1149 |
# - auth_header: the header sent in the request
|
|
1150 |
||
1151 |
def __init__(self): |
|
1152 |
# We want to know when we enter into an try/fail cycle of
|
|
1153 |
# authentications so we initialize to None to indicate that we aren't
|
|
1154 |
# in such a cycle by default.
|
|
1155 |
self._retry_count = None |
|
1156 |
||
1157 |
def _parse_auth_header(self, server_header): |
|
1158 |
"""Parse the authentication header. |
|
1159 |
||
1160 |
:param server_header: The value of the header sent by the server
|
|
1161 |
describing the authenticaion request.
|
|
1162 |
||
1163 |
:return: A tuple (scheme, remainder) scheme being the first word in the
|
|
1164 |
given header (lower cased), remainder may be None.
|
|
1165 |
"""
|
|
1166 |
try: |
|
1167 |
scheme, remainder = server_header.split(None, 1) |
|
1168 |
except ValueError: |
|
1169 |
scheme = server_header |
|
1170 |
remainder = None |
|
1171 |
return (scheme.lower(), remainder) |
|
1172 |
||
1173 |
def update_auth(self, auth, key, value): |
|
1174 |
"""Update a value in auth marking the auth as modified if needed""" |
|
1175 |
old_value = auth.get(key, None) |
|
1176 |
if old_value != value: |
|
1177 |
auth[key] = value |
|
1178 |
auth['modified'] = True |
|
1179 |
||
1180 |
def auth_required(self, request, headers): |
|
1181 |
"""Retry the request if the auth scheme is ours. |
|
1182 |
||
1183 |
:param request: The request needing authentication.
|
|
1184 |
:param headers: The headers for the authentication error response.
|
|
1185 |
:return: None or the response for the authenticated request.
|
|
1186 |
"""
|
|
1187 |
# Don't try to authenticate endlessly
|
|
1188 |
if self._retry_count is None: |
|
1189 |
# The retry being recusrsive calls, None identify the first retry
|
|
1190 |
self._retry_count = 1 |
|
1191 |
else: |
|
1192 |
self._retry_count += 1 |
|
1193 |
if self._retry_count > self._max_retry: |
|
1194 |
# Let's be ready for next round
|
|
1195 |
self._retry_count = None |
|
1196 |
return None |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
1197 |
server_headers = headers.get_all(self.auth_required_header) |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
1198 |
if not server_headers: |
1199 |
# The http error MUST have the associated
|
|
1200 |
# header. This must never happen in production code.
|
|
1201 |
trace.mutter('%s not found', self.auth_required_header) |
|
1202 |
return None |
|
1203 |
||
1204 |
auth = self.get_auth(request) |
|
1205 |
auth['modified'] = False |
|
1206 |
# Put some common info in auth if the caller didn't
|
|
1207 |
if auth.get('path', None) is None: |
|
1208 |
parsed_url = urlutils.URL.from_string(request.get_full_url()) |
|
1209 |
self.update_auth(auth, 'protocol', parsed_url.scheme) |
|
1210 |
self.update_auth(auth, 'host', parsed_url.host) |
|
1211 |
self.update_auth(auth, 'port', parsed_url.port) |
|
1212 |
self.update_auth(auth, 'path', parsed_url.path) |
|
1213 |
# FIXME: the auth handler should be selected at a single place instead
|
|
1214 |
# of letting all handlers try to match all headers, but the current
|
|
1215 |
# design doesn't allow a simple implementation.
|
|
1216 |
for server_header in server_headers: |
|
1217 |
# Several schemes can be proposed by the server, try to match each
|
|
1218 |
# one in turn
|
|
1219 |
matching_handler = self.auth_match(server_header, auth) |
|
1220 |
if matching_handler: |
|
1221 |
# auth_match may have modified auth (by adding the
|
|
1222 |
# password or changing the realm, for example)
|
|
1223 |
if (request.get_header(self.auth_header, None) is not None |
|
1224 |
and not auth['modified']): |
|
1225 |
# We already tried that, give up
|
|
1226 |
return None |
|
1227 |
||
1228 |
# Only the most secure scheme proposed by the server should be
|
|
1229 |
# used, since the handlers use 'handler_order' to describe that
|
|
1230 |
# property, the first handler tried takes precedence, the
|
|
1231 |
# others should not attempt to authenticate if the best one
|
|
1232 |
# failed.
|
|
1233 |
best_scheme = auth.get('best_scheme', None) |
|
1234 |
if best_scheme is None: |
|
1235 |
# At that point, if current handler should doesn't succeed
|
|
1236 |
# the credentials are wrong (or incomplete), but we know
|
|
1237 |
# that the associated scheme should be used.
|
|
1238 |
best_scheme = auth['best_scheme'] = self.scheme |
|
1239 |
if best_scheme != self.scheme: |
|
1240 |
continue
|
|
1241 |
||
1242 |
if self.requires_username and auth.get('user', None) is None: |
|
1243 |
# Without a known user, we can't authenticate
|
|
1244 |
return None |
|
1245 |
||
1246 |
# Housekeeping
|
|
1247 |
request.connection.cleanup_pipe() |
|
1248 |
# Retry the request with an authentication header added
|
|
1249 |
response = self.parent.open(request) |
|
1250 |
if response: |
|
1251 |
self.auth_successful(request, response) |
|
1252 |
return response |
|
1253 |
# We are not qualified to handle the authentication.
|
|
1254 |
# Note: the authentication error handling will try all
|
|
1255 |
# available handlers. If one of them authenticates
|
|
1256 |
# successfully, a response will be returned. If none of
|
|
1257 |
# them succeeds, None will be returned and the error
|
|
1258 |
# handler will raise the 401 'Unauthorized' or the 407
|
|
1259 |
# 'Proxy Authentication Required' error.
|
|
1260 |
return None |
|
1261 |
||
1262 |
def add_auth_header(self, request, header): |
|
1263 |
"""Add the authentication header to the request""" |
|
1264 |
request.add_unredirected_header(self.auth_header, header) |
|
1265 |
||
1266 |
def auth_match(self, header, auth): |
|
1267 |
"""Check that we are able to handle that authentication scheme. |
|
1268 |
||
1269 |
The request authentication parameters may need to be
|
|
1270 |
updated with info from the server. Some of these
|
|
1271 |
parameters, when combined, are considered to be the
|
|
1272 |
authentication key, if one of them change the
|
|
1273 |
authentication result may change. 'user' and 'password'
|
|
1274 |
are exampls, but some auth schemes may have others
|
|
1275 |
(digest's nonce is an example, digest's nonce_count is a
|
|
1276 |
*counter-example*). Such parameters must be updated by
|
|
1277 |
using the update_auth() method.
|
|
1278 |
||
1279 |
:param header: The authentication header sent by the server.
|
|
1280 |
:param auth: The auth parameters already known. They may be
|
|
1281 |
updated.
|
|
1282 |
:returns: True if we can try to handle the authentication.
|
|
1283 |
"""
|
|
1284 |
raise NotImplementedError(self.auth_match) |
|
1285 |
||
1286 |
def build_auth_header(self, auth, request): |
|
1287 |
"""Build the value of the header used to authenticate. |
|
1288 |
||
1289 |
:param auth: The auth parameters needed to build the header.
|
|
1290 |
:param request: The request needing authentication.
|
|
1291 |
||
1292 |
:return: None or header.
|
|
1293 |
"""
|
|
1294 |
raise NotImplementedError(self.build_auth_header) |
|
1295 |
||
1296 |
def auth_successful(self, request, response): |
|
1297 |
"""The authentification was successful for the request. |
|
1298 |
||
1299 |
Additional infos may be available in the response.
|
|
1300 |
||
1301 |
:param request: The succesfully authenticated request.
|
|
1302 |
:param response: The server response (may contain auth info).
|
|
1303 |
"""
|
|
1304 |
# It may happen that we need to reconnect later, let's be ready
|
|
1305 |
self._retry_count = None |
|
1306 |
||
1307 |
def get_user_password(self, auth): |
|
1308 |
"""Ask user for a password if none is already available. |
|
1309 |
||
1310 |
:param auth: authentication info gathered so far (from the initial url
|
|
1311 |
and then during dialog with the server).
|
|
1312 |
"""
|
|
1313 |
auth_conf = config.AuthenticationConfig() |
|
1314 |
user = auth.get('user', None) |
|
1315 |
password = auth.get('password', None) |
|
1316 |
realm = auth['realm'] |
|
1317 |
port = auth.get('port', None) |
|
1318 |
||
1319 |
if user is None: |
|
1320 |
user = auth_conf.get_user(auth['protocol'], auth['host'], |
|
1321 |
port=port, path=auth['path'], |
|
1322 |
realm=realm, ask=True, |
|
1323 |
prompt=self.build_username_prompt(auth)) |
|
1324 |
if user is not None and password is None: |
|
1325 |
password = auth_conf.get_password( |
|
1326 |
auth['protocol'], auth['host'], user, |
|
1327 |
port=port, |
|
1328 |
path=auth['path'], realm=realm, |
|
1329 |
prompt=self.build_password_prompt(auth)) |
|
1330 |
||
1331 |
return user, password |
|
1332 |
||
1333 |
def _build_password_prompt(self, auth): |
|
1334 |
"""Build a prompt taking the protocol used into account. |
|
1335 |
||
1336 |
The AuthHandler is used by http and https, we want that information in
|
|
1337 |
the prompt, so we build the prompt from the authentication dict which
|
|
1338 |
contains all the needed parts.
|
|
1339 |
||
1340 |
Also, http and proxy AuthHandlers present different prompts to the
|
|
1341 |
user. The daughter classes should implements a public
|
|
1342 |
build_password_prompt using this method.
|
|
1343 |
"""
|
|
1344 |
prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s' |
|
1345 |
realm = auth['realm'] |
|
1346 |
if realm is not None: |
|
1347 |
prompt += u", Realm: '%s'" % realm |
|
1348 |
prompt += u' password' |
|
1349 |
return prompt |
|
1350 |
||
1351 |
def _build_username_prompt(self, auth): |
|
1352 |
"""Build a prompt taking the protocol used into account. |
|
1353 |
||
1354 |
The AuthHandler is used by http and https, we want that information in
|
|
1355 |
the prompt, so we build the prompt from the authentication dict which
|
|
1356 |
contains all the needed parts.
|
|
1357 |
||
1358 |
Also, http and proxy AuthHandlers present different prompts to the
|
|
1359 |
user. The daughter classes should implements a public
|
|
1360 |
build_username_prompt using this method.
|
|
1361 |
"""
|
|
1362 |
prompt = u'%s' % auth['protocol'].upper() + u' %(host)s' |
|
1363 |
realm = auth['realm'] |
|
1364 |
if realm is not None: |
|
1365 |
prompt += u", Realm: '%s'" % realm |
|
1366 |
prompt += u' username' |
|
1367 |
return prompt |
|
1368 |
||
1369 |
def http_request(self, request): |
|
1370 |
"""Insert an authentication header if information is available""" |
|
1371 |
auth = self.get_auth(request) |
|
1372 |
if self.auth_params_reusable(auth): |
|
1373 |
self.add_auth_header( |
|
1374 |
request, self.build_auth_header(auth, request)) |
|
1375 |
return request |
|
1376 |
||
1377 |
https_request = http_request # FIXME: Need test |
|
1378 |
||
1379 |
||
1380 |
class NegotiateAuthHandler(AbstractAuthHandler): |
|
1381 |
"""A authentication handler that handles WWW-Authenticate: Negotiate. |
|
1382 |
||
1383 |
At the moment this handler supports just Kerberos. In the future,
|
|
1384 |
NTLM support may also be added.
|
|
1385 |
"""
|
|
1386 |
||
1387 |
scheme = 'negotiate' |
|
1388 |
handler_order = 480 |
|
1389 |
requires_username = False |
|
1390 |
||
1391 |
def auth_match(self, header, auth): |
|
1392 |
scheme, raw_auth = self._parse_auth_header(header) |
|
1393 |
if scheme != self.scheme: |
|
1394 |
return False |
|
1395 |
self.update_auth(auth, 'scheme', scheme) |
|
1396 |
resp = self._auth_match_kerberos(auth) |
|
1397 |
if resp is None: |
|
1398 |
return False |
|
1399 |
# Optionally should try to authenticate using NTLM here
|
|
1400 |
self.update_auth(auth, 'negotiate_response', resp) |
|
1401 |
return True |
|
1402 |
||
1403 |
def _auth_match_kerberos(self, auth): |
|
1404 |
"""Try to create a GSSAPI response for authenticating against a host.""" |
|
1405 |
global kerberos, checked_kerberos |
|
1406 |
if kerberos is None and not checked_kerberos: |
|
1407 |
try: |
|
1408 |
import kerberos |
|
1409 |
except ImportError: |
|
1410 |
kerberos = None |
|
1411 |
checked_kerberos = True |
|
1412 |
if kerberos is None: |
|
1413 |
return None |
|
1414 |
ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth) |
|
1415 |
if ret < 1: |
|
1416 |
trace.warning('Unable to create GSSAPI context for %s: %d', |
|
1417 |
auth['host'], ret) |
|
1418 |
return None |
|
1419 |
ret = kerberos.authGSSClientStep(vc, "") |
|
1420 |
if ret < 0: |
|
1421 |
trace.mutter('authGSSClientStep failed: %d', ret) |
|
1422 |
return None |
|
1423 |
return kerberos.authGSSClientResponse(vc) |
|
1424 |
||
1425 |
def build_auth_header(self, auth, request): |
|
1426 |
return "Negotiate %s" % auth['negotiate_response'] |
|
1427 |
||
1428 |
def auth_params_reusable(self, auth): |
|
1429 |
# If the auth scheme is known, it means a previous
|
|
1430 |
# authentication was successful, all information is
|
|
1431 |
# available, no further checks are needed.
|
|
1432 |
return (auth.get('scheme', None) == 'negotiate' and |
|
1433 |
auth.get('negotiate_response', None) is not None) |
|
1434 |
||
1435 |
||
1436 |
class BasicAuthHandler(AbstractAuthHandler): |
|
1437 |
"""A custom basic authentication handler.""" |
|
1438 |
||
1439 |
scheme = 'basic' |
|
1440 |
handler_order = 500 |
|
1441 |
auth_regexp = re.compile('realm="([^"]*)"', re.I) |
|
1442 |
||
1443 |
def build_auth_header(self, auth, request): |
|
1444 |
raw = '%s:%s' % (auth['user'], auth['password']) |
|
1445 |
auth_header = 'Basic ' + \ |
|
1446 |
base64.b64encode(raw.encode('utf-8')).decode('ascii') |
|
1447 |
return auth_header |
|
1448 |
||
1449 |
def extract_realm(self, header_value): |
|
1450 |
match = self.auth_regexp.search(header_value) |
|
1451 |
realm = None |
|
1452 |
if match: |
|
1453 |
realm = match.group(1) |
|
1454 |
return match, realm |
|
1455 |
||
1456 |
def auth_match(self, header, auth): |
|
1457 |
scheme, raw_auth = self._parse_auth_header(header) |
|
1458 |
if scheme != self.scheme: |
|
1459 |
return False |
|
1460 |
||
1461 |
match, realm = self.extract_realm(raw_auth) |
|
1462 |
if match: |
|
1463 |
# Put useful info into auth
|
|
1464 |
self.update_auth(auth, 'scheme', scheme) |
|
1465 |
self.update_auth(auth, 'realm', realm) |
|
1466 |
if (auth.get('user', None) is None |
|
1467 |
or auth.get('password', None) is None): |
|
1468 |
user, password = self.get_user_password(auth) |
|
1469 |
self.update_auth(auth, 'user', user) |
|
1470 |
self.update_auth(auth, 'password', password) |
|
1471 |
return match is not None |
|
1472 |
||
1473 |
def auth_params_reusable(self, auth): |
|
1474 |
# If the auth scheme is known, it means a previous
|
|
1475 |
# authentication was successful, all information is
|
|
1476 |
# available, no further checks are needed.
|
|
1477 |
return auth.get('scheme', None) == 'basic' |
|
1478 |
||
1479 |
||
1480 |
def get_digest_algorithm_impls(algorithm): |
|
1481 |
H = None |
|
1482 |
KD = None |
|
1483 |
if algorithm == 'MD5': |
|
1484 |
def H(x): return osutils.md5(x).hexdigest() |
|
1485 |
elif algorithm == 'SHA': |
|
1486 |
H = osutils.sha_string |
|
1487 |
if H is not None: |
|
1488 |
def KD(secret, data): return H( |
|
1489 |
("%s:%s" % (secret, data)).encode('utf-8')) |
|
1490 |
return H, KD |
|
1491 |
||
1492 |
||
1493 |
def get_new_cnonce(nonce, nonce_count): |
|
1494 |
raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(), |
|
1495 |
osutils.rand_chars(8)) |
|
1496 |
return osutils.sha_string(raw.encode('utf-8'))[:16] |
|
1497 |
||
1498 |
||
1499 |
class DigestAuthHandler(AbstractAuthHandler): |
|
1500 |
"""A custom digest authentication handler.""" |
|
1501 |
||
1502 |
scheme = 'digest' |
|
1503 |
# Before basic as digest is a bit more secure and should be preferred
|
|
1504 |
handler_order = 490 |
|
1505 |
||
1506 |
def auth_params_reusable(self, auth): |
|
1507 |
# If the auth scheme is known, it means a previous
|
|
1508 |
# authentication was successful, all information is
|
|
1509 |
# available, no further checks are needed.
|
|
1510 |
return auth.get('scheme', None) == 'digest' |
|
1511 |
||
1512 |
def auth_match(self, header, auth): |
|
1513 |
scheme, raw_auth = self._parse_auth_header(header) |
|
1514 |
if scheme != self.scheme: |
|
1515 |
return False |
|
1516 |
||
1517 |
# Put the requested authentication info into a dict
|
|
1518 |
req_auth = urllib_request.parse_keqv_list( |
|
1519 |
urllib_request.parse_http_list(raw_auth)) |
|
1520 |
||
1521 |
# Check that we can handle that authentication
|
|
1522 |
qop = req_auth.get('qop', None) |
|
1523 |
if qop != 'auth': # No auth-int so far |
|
1524 |
return False |
|
1525 |
||
1526 |
H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5')) |
|
1527 |
if H is None: |
|
1528 |
return False |
|
1529 |
||
1530 |
realm = req_auth.get('realm', None) |
|
1531 |
# Put useful info into auth
|
|
1532 |
self.update_auth(auth, 'scheme', scheme) |
|
1533 |
self.update_auth(auth, 'realm', realm) |
|
1534 |
if auth.get('user', None) is None or auth.get('password', None) is None: |
|
1535 |
user, password = self.get_user_password(auth) |
|
1536 |
self.update_auth(auth, 'user', user) |
|
1537 |
self.update_auth(auth, 'password', password) |
|
1538 |
||
1539 |
try: |
|
1540 |
if req_auth.get('algorithm', None) is not None: |
|
1541 |
self.update_auth(auth, 'algorithm', req_auth.get('algorithm')) |
|
1542 |
nonce = req_auth['nonce'] |
|
1543 |
if auth.get('nonce', None) != nonce: |
|
1544 |
# A new nonce, never used
|
|
1545 |
self.update_auth(auth, 'nonce_count', 0) |
|
1546 |
self.update_auth(auth, 'nonce', nonce) |
|
1547 |
self.update_auth(auth, 'qop', qop) |
|
1548 |
auth['opaque'] = req_auth.get('opaque', None) |
|
1549 |
except KeyError: |
|
1550 |
# Some required field is not there
|
|
1551 |
return False |
|
1552 |
||
1553 |
return True |
|
1554 |
||
1555 |
def build_auth_header(self, auth, request): |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
1556 |
selector = request.selector |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
1557 |
url_scheme, url_selector = splittype(selector) |
1558 |
sel_host, uri = splithost(url_selector) |
|
1559 |
||
1560 |
A1 = ('%s:%s:%s' % |
|
1561 |
(auth['user'], auth['realm'], auth['password'])).encode('utf-8') |
|
1562 |
A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8') |
|
1563 |
||
1564 |
nonce = auth['nonce'] |
|
1565 |
qop = auth['qop'] |
|
1566 |
||
1567 |
nonce_count = auth['nonce_count'] + 1 |
|
1568 |
ncvalue = '%08x' % nonce_count |
|
1569 |
cnonce = get_new_cnonce(nonce, nonce_count) |
|
1570 |
||
1571 |
H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5')) |
|
1572 |
nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2)) |
|
1573 |
request_digest = KD(H(A1), nonce_data) |
|
1574 |
||
1575 |
header = 'Digest ' |
|
1576 |
header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'], |
|
1577 |
auth['realm'], |
|
1578 |
nonce) |
|
1579 |
header += ', uri="%s"' % uri |
|
1580 |
header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue) |
|
1581 |
header += ', qop="%s"' % qop |
|
1582 |
header += ', response="%s"' % request_digest |
|
1583 |
# Append the optional fields
|
|
1584 |
opaque = auth.get('opaque', None) |
|
1585 |
if opaque: |
|
1586 |
header += ', opaque="%s"' % opaque |
|
1587 |
if auth.get('algorithm', None): |
|
1588 |
header += ', algorithm="%s"' % auth.get('algorithm') |
|
1589 |
||
1590 |
# We have used the nonce once more, update the count
|
|
1591 |
auth['nonce_count'] = nonce_count |
|
1592 |
||
1593 |
return header |
|
1594 |
||
1595 |
||
1596 |
class HTTPAuthHandler(AbstractAuthHandler): |
|
1597 |
"""Custom http authentication handler. |
|
1598 |
||
1599 |
Send the authentication preventively to avoid the roundtrip
|
|
1600 |
associated with the 401 error and keep the revelant info in
|
|
1601 |
the auth request attribute.
|
|
1602 |
"""
|
|
1603 |
||
1604 |
auth_required_header = 'www-authenticate' |
|
1605 |
auth_header = 'Authorization' |
|
1606 |
||
1607 |
def get_auth(self, request): |
|
1608 |
"""Get the auth params from the request""" |
|
1609 |
return request.auth |
|
1610 |
||
1611 |
def set_auth(self, request, auth): |
|
1612 |
"""Set the auth params for the request""" |
|
1613 |
request.auth = auth |
|
1614 |
||
1615 |
def build_password_prompt(self, auth): |
|
1616 |
return self._build_password_prompt(auth) |
|
1617 |
||
1618 |
def build_username_prompt(self, auth): |
|
1619 |
return self._build_username_prompt(auth) |
|
1620 |
||
1621 |
def http_error_401(self, req, fp, code, msg, headers): |
|
1622 |
return self.auth_required(req, headers) |
|
1623 |
||
1624 |
||
1625 |
class ProxyAuthHandler(AbstractAuthHandler): |
|
1626 |
"""Custom proxy authentication handler. |
|
1627 |
||
1628 |
Send the authentication preventively to avoid the roundtrip
|
|
1629 |
associated with the 407 error and keep the revelant info in
|
|
1630 |
the proxy_auth request attribute..
|
|
1631 |
"""
|
|
1632 |
||
1633 |
auth_required_header = 'proxy-authenticate' |
|
1634 |
# FIXME: the correct capitalization is Proxy-Authorization,
|
|
1635 |
# but python-2.4 urllib_request.Request insist on using capitalize()
|
|
1636 |
# instead of title().
|
|
1637 |
auth_header = 'Proxy-authorization' |
|
1638 |
||
1639 |
def get_auth(self, request): |
|
1640 |
"""Get the auth params from the request""" |
|
1641 |
return request.proxy_auth |
|
1642 |
||
1643 |
def set_auth(self, request, auth): |
|
1644 |
"""Set the auth params for the request""" |
|
1645 |
request.proxy_auth = auth |
|
1646 |
||
1647 |
def build_password_prompt(self, auth): |
|
1648 |
prompt = self._build_password_prompt(auth) |
|
1649 |
prompt = u'Proxy ' + prompt |
|
1650 |
return prompt |
|
1651 |
||
1652 |
def build_username_prompt(self, auth): |
|
1653 |
prompt = self._build_username_prompt(auth) |
|
1654 |
prompt = u'Proxy ' + prompt |
|
1655 |
return prompt |
|
1656 |
||
1657 |
def http_error_407(self, req, fp, code, msg, headers): |
|
1658 |
return self.auth_required(req, headers) |
|
1659 |
||
1660 |
||
1661 |
class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler): |
|
1662 |
"""Custom http basic authentication handler""" |
|
1663 |
||
1664 |
||
1665 |
class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler): |
|
1666 |
"""Custom proxy basic authentication handler""" |
|
1667 |
||
1668 |
||
1669 |
class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler): |
|
1670 |
"""Custom http basic authentication handler""" |
|
1671 |
||
1672 |
||
1673 |
class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler): |
|
1674 |
"""Custom proxy basic authentication handler""" |
|
1675 |
||
1676 |
||
1677 |
class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler): |
|
1678 |
"""Custom http negotiate authentication handler""" |
|
1679 |
||
1680 |
||
1681 |
class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler): |
|
1682 |
"""Custom proxy negotiate authentication handler""" |
|
1683 |
||
1684 |
||
1685 |
class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor): |
|
1686 |
"""Process HTTP error responses. |
|
1687 |
||
1688 |
We don't really process the errors, quite the contrary
|
|
1689 |
instead, we leave our Transport handle them.
|
|
1690 |
"""
|
|
1691 |
||
1692 |
accepted_errors = [200, # Ok |
|
1693 |
201, |
|
1694 |
202, |
|
1695 |
204, |
|
1696 |
206, # Partial content |
|
1697 |
400, |
|
1698 |
403, |
|
1699 |
404, # Not found |
|
1700 |
405, # Method not allowed |
|
1701 |
406, # Not Acceptable |
|
1702 |
409, # Conflict |
|
1703 |
416, # Range not satisfiable |
|
1704 |
422, # Unprocessible entity |
|
1705 |
501, # Not implemented |
|
1706 |
]
|
|
1707 |
"""The error codes the caller will handle. |
|
1708 |
||
1709 |
This can be specialized in the request on a case-by case basis, but the
|
|
1710 |
common cases are covered here.
|
|
1711 |
"""
|
|
1712 |
||
1713 |
def http_response(self, request, response): |
|
1714 |
code, msg, hdrs = response.code, response.msg, response.info() |
|
1715 |
||
1716 |
if code not in self.accepted_errors: |
|
1717 |
response = self.parent.error('http', request, response, |
|
1718 |
code, msg, hdrs) |
|
1719 |
return response |
|
1720 |
||
1721 |
https_response = http_response |
|
1722 |
||
1723 |
||
1724 |
class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler): |
|
1725 |
"""Translate common errors into Breezy Exceptions""" |
|
1726 |
||
1727 |
def http_error_default(self, req, fp, code, msg, hdrs): |
|
1728 |
if code == 403: |
|
1729 |
raise errors.TransportError( |
|
1730 |
'Server refuses to fulfill the request (403 Forbidden)'
|
|
1731 |
' for %s' % req.get_full_url()) |
|
1732 |
else: |
|
1733 |
raise errors.UnexpectedHttpStatus( |
|
1734 |
req.get_full_url(), code, |
|
1735 |
'Unable to handle http code: %s' % msg) |
|
1736 |
||
1737 |
||
1738 |
class Opener(object): |
|
1739 |
"""A wrapper around urllib_request.build_opener |
|
1740 |
||
1741 |
Daughter classes can override to build their own specific opener
|
|
1742 |
"""
|
|
1743 |
# TODO: Provides hooks for daughter classes.
|
|
1744 |
||
1745 |
def __init__(self, |
|
1746 |
connection=ConnectionHandler, |
|
1747 |
redirect=HTTPRedirectHandler, |
|
1748 |
error=HTTPErrorProcessor, |
|
1749 |
report_activity=None, |
|
1750 |
ca_certs=None): |
|
1751 |
self._opener = urllib_request.build_opener( |
|
1752 |
connection(report_activity=report_activity, ca_certs=ca_certs), |
|
1753 |
redirect, error, |
|
1754 |
ProxyHandler(), |
|
1755 |
HTTPBasicAuthHandler(), |
|
1756 |
HTTPDigestAuthHandler(), |
|
1757 |
HTTPNegotiateAuthHandler(), |
|
1758 |
ProxyBasicAuthHandler(), |
|
1759 |
ProxyDigestAuthHandler(), |
|
1760 |
ProxyNegotiateAuthHandler(), |
|
1761 |
HTTPHandler, |
|
1762 |
HTTPSHandler, |
|
1763 |
HTTPDefaultErrorHandler, |
|
1764 |
)
|
|
1765 |
||
1766 |
self.open = self._opener.open |
|
1767 |
if DEBUG >= 9: |
|
1768 |
# When dealing with handler order, it's easy to mess
|
|
1769 |
# things up, the following will help understand which
|
|
1770 |
# handler is used, when and for what.
|
|
1771 |
import pprint |
|
1772 |
pprint.pprint(self._opener.__dict__) |
|
1773 |
||
1774 |
||
1775 |
class HttpTransport(ConnectedTransport): |
|
1776 |
"""HTTP Client implementations. |
|
1777 |
||
1778 |
The protocol can be given as e.g. http+urllib://host/ to use a particular
|
|
1779 |
implementation.
|
|
1780 |
"""
|
|
1781 |
||
1782 |
# _unqualified_scheme: "http" or "https"
|
|
1783 |
# _scheme: may have "+pycurl", etc
|
|
1784 |
||
1785 |
# In order to debug we have to issue our traces in sync with
|
|
1786 |
# httplib, which use print :(
|
|
1787 |
_debuglevel = 0 |
|
1788 |
||
1789 |
def __init__(self, base, _from_transport=None, ca_certs=None): |
|
1790 |
"""Set the base path where files will be stored.""" |
|
1791 |
proto_match = re.match(r'^(https?)(\+\w+)?://', base) |
|
1792 |
if not proto_match: |
|
1793 |
raise AssertionError("not a http url: %r" % base) |
|
1794 |
self._unqualified_scheme = proto_match.group(1) |
|
1795 |
super(HttpTransport, self).__init__( |
|
1796 |
base, _from_transport=_from_transport) |
|
1797 |
self._medium = None |
|
1798 |
# range hint is handled dynamically throughout the life
|
|
1799 |
# of the transport object. We start by trying multi-range
|
|
1800 |
# requests and if the server returns bogus results, we
|
|
1801 |
# retry with single range requests and, finally, we
|
|
1802 |
# forget about range if the server really can't
|
|
1803 |
# understand. Once acquired, this piece of info is
|
|
1804 |
# propagated to clones.
|
|
1805 |
if _from_transport is not None: |
|
1806 |
self._range_hint = _from_transport._range_hint |
|
1807 |
self._opener = _from_transport._opener |
|
1808 |
else: |
|
1809 |
self._range_hint = 'multi' |
|
1810 |
self._opener = Opener( |
|
1811 |
report_activity=self._report_activity, ca_certs=ca_certs) |
|
1812 |
||
1813 |
def request(self, method, url, fields=None, headers=None, **urlopen_kw): |
|
1814 |
body = urlopen_kw.pop('body', None) |
|
1815 |
if fields is not None: |
|
1816 |
data = urlencode(fields).encode() |
|
1817 |
if body is not None: |
|
1818 |
raise ValueError( |
|
1819 |
'body and fields are mutually exclusive') |
|
1820 |
else: |
|
1821 |
data = body |
|
1822 |
if headers is None: |
|
1823 |
headers = {} |
|
1824 |
request = Request(method, url, data, headers) |
|
1825 |
request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0) |
|
1826 |
if urlopen_kw: |
|
1827 |
raise NotImplementedError( |
|
1828 |
'unknown arguments: %r' % urlopen_kw.keys()) |
|
1829 |
connection = self._get_connection() |
|
1830 |
if connection is not None: |
|
1831 |
# Give back shared info
|
|
1832 |
request.connection = connection |
|
1833 |
(auth, proxy_auth) = self._get_credentials() |
|
1834 |
# Clean the httplib.HTTPConnection pipeline in case the previous
|
|
1835 |
# request couldn't do it
|
|
1836 |
connection.cleanup_pipe() |
|
1837 |
else: |
|
1838 |
# First request, initialize credentials.
|
|
1839 |
# scheme and realm will be set by the _urllib2_wrappers.AuthHandler
|
|
1840 |
auth = self._create_auth() |
|
1841 |
# Proxy initialization will be done by the first proxied request
|
|
1842 |
proxy_auth = dict() |
|
1843 |
# Ensure authentication info is provided
|
|
1844 |
request.auth = auth |
|
1845 |
request.proxy_auth = proxy_auth |
|
1846 |
||
1847 |
if self._debuglevel > 0: |
|
1848 |
print('perform: %s base: %s, url: %s' % (request.method, self.base, |
|
1849 |
request.get_full_url())) |
|
1850 |
response = self._opener.open(request) |
|
1851 |
if self._get_connection() is not request.connection: |
|
1852 |
# First connection or reconnection
|
|
1853 |
self._set_connection(request.connection, |
|
1854 |
(request.auth, request.proxy_auth)) |
|
1855 |
else: |
|
1856 |
# http may change the credentials while keeping the
|
|
1857 |
# connection opened
|
|
1858 |
self._update_credentials((request.auth, request.proxy_auth)) |
|
1859 |
||
1860 |
code = response.code |
|
1861 |
if (request.follow_redirections is False |
|
1862 |
and code in (301, 302, 303, 307, 308)): |
|
1863 |
raise errors.RedirectRequested(request.get_full_url(), |
|
1864 |
request.redirected_to, |
|
1865 |
is_permanent=(code in (301, 308))) |
|
1866 |
||
1867 |
if request.redirected_to is not None: |
|
1868 |
trace.mutter('redirected from: %s to: %s' % (request.get_full_url(), |
|
1869 |
request.redirected_to)) |
|
1870 |
||
1871 |
class Urllib3LikeResponse(object): |
|
1872 |
||
1873 |
def __init__(self, actual): |
|
1874 |
self._actual = actual |
|
1875 |
self._data = None |
|
1876 |
||
1877 |
def getheader(self, name, default=None): |
|
1878 |
if self._actual.headers is None: |
|
1879 |
raise http_client.ResponseNotReady() |
|
|
7526.2.2
by Jelmer Vernooij
Avoid sixish. |
1880 |
return self._actual.headers.get(name, default) |
|
7490.159.1
by Jelmer Vernooij
Split urllib out. |
1881 |
|
1882 |
def getheaders(self): |
|
1883 |
if self._actual.headers is None: |
|
1884 |
raise http_client.ResponseNotReady() |
|
1885 |
return list(self._actual.headers.items()) |
|
1886 |
||
1887 |
@property
|
|
1888 |
def status(self): |
|
1889 |
return self._actual.code |
|
1890 |
||
1891 |
@property
|
|
1892 |
def reason(self): |
|
1893 |
return self._actual.reason |
|
1894 |
||
1895 |
@property
|
|
1896 |
def data(self): |
|
1897 |
if self._data is None: |
|
1898 |
self._data = self._actual.read() |
|
1899 |
return self._data |
|
1900 |
||
1901 |
@property
|
|
1902 |
def text(self): |
|
1903 |
if self.status == 204: |
|
1904 |
return None |
|
1905 |
charset = cgi.parse_header( |
|
1906 |
self._actual.headers['Content-Type'])[1].get('charset') |
|
1907 |
if charset: |
|
1908 |
return self.data.decode(charset) |
|
1909 |
else: |
|
1910 |
return self.data.decode() |
|
1911 |
||
1912 |
def read(self, amt=None): |
|
1913 |
return self._actual.read(amt) |
|
1914 |
||
1915 |
def readlines(self): |
|
1916 |
return self._actual.readlines() |
|
1917 |
||
1918 |
def readline(self, size=-1): |
|
1919 |
return self._actual.readline(size) |
|
1920 |
||
1921 |
return Urllib3LikeResponse(response) |
|
1922 |
||
1923 |
def disconnect(self): |
|
1924 |
connection = self._get_connection() |
|
1925 |
if connection is not None: |
|
1926 |
connection.close() |
|
1927 |
||
1928 |
def has(self, relpath): |
|
1929 |
"""Does the target location exist? |
|
1930 |
"""
|
|
1931 |
response = self._head(relpath) |
|
1932 |
||
1933 |
code = response.status |
|
1934 |
if code == 200: # "ok", |
|
1935 |
return True |
|
1936 |
else: |
|
1937 |
return False |
|
1938 |
||
1939 |
def get(self, relpath): |
|
1940 |
"""Get the file at the given relative path. |
|
1941 |
||
1942 |
:param relpath: The relative path to the file
|
|
1943 |
"""
|
|
1944 |
code, response_file = self._get(relpath, None) |
|
1945 |
return response_file |
|
1946 |
||
1947 |
def _get(self, relpath, offsets, tail_amount=0): |
|
1948 |
"""Get a file, or part of a file. |
|
1949 |
||
1950 |
:param relpath: Path relative to transport base URL
|
|
1951 |
:param offsets: None to get the whole file;
|
|
1952 |
or a list of _CoalescedOffset to fetch parts of a file.
|
|
1953 |
:param tail_amount: The amount to get from the end of the file.
|
|
1954 |
||
1955 |
:returns: (http_code, result_file)
|
|
1956 |
"""
|
|
1957 |
abspath = self._remote_path(relpath) |
|
1958 |
headers = {} |
|
1959 |
if offsets or tail_amount: |
|
1960 |
range_header = self._attempted_range_header(offsets, tail_amount) |
|
1961 |
if range_header is not None: |
|
1962 |
bytes = 'bytes=' + range_header |
|
1963 |
headers = {'Range': bytes} |
|
1964 |
else: |
|
1965 |
range_header = None |
|
1966 |
||
1967 |
response = self.request('GET', abspath, headers=headers) |
|
1968 |
||
1969 |
if response.status == 404: # not found |
|
1970 |
raise errors.NoSuchFile(abspath) |
|
1971 |
elif response.status == 416: |
|
1972 |
# We don't know which, but one of the ranges we specified was
|
|
1973 |
# wrong.
|
|
1974 |
raise errors.InvalidHttpRange(abspath, range_header, |
|
1975 |
'Server return code %d' % response.status) |
|
1976 |
elif response.status == 400: |
|
1977 |
if range_header: |
|
1978 |
# We don't know which, but one of the ranges we specified was
|
|
1979 |
# wrong.
|
|
1980 |
raise errors.InvalidHttpRange( |
|
1981 |
abspath, range_header, |
|
1982 |
'Server return code %d' % response.status) |
|
1983 |
else: |
|
1984 |
raise errors.BadHttpRequest(abspath, response.reason) |
|
1985 |
elif response.status not in (200, 206): |
|
1986 |
raise errors.UnexpectedHttpStatus(abspath, response.status) |
|
1987 |
||
1988 |
data = handle_response( |
|
1989 |
abspath, response.status, response.getheader, response) |
|
1990 |
return response.status, data |
|
1991 |
||
1992 |
def _remote_path(self, relpath): |
|
1993 |
"""See ConnectedTransport._remote_path. |
|
1994 |
||
1995 |
user and passwords are not embedded in the path provided to the server.
|
|
1996 |
"""
|
|
1997 |
url = self._parsed_url.clone(relpath) |
|
1998 |
url.user = url.quoted_user = None |
|
1999 |
url.password = url.quoted_password = None |
|
2000 |
url.scheme = self._unqualified_scheme |
|
2001 |
return str(url) |
|
2002 |
||
2003 |
def _create_auth(self): |
|
2004 |
"""Returns a dict containing the credentials provided at build time.""" |
|
2005 |
auth = dict(host=self._parsed_url.host, port=self._parsed_url.port, |
|
2006 |
user=self._parsed_url.user, password=self._parsed_url.password, |
|
2007 |
protocol=self._unqualified_scheme, |
|
2008 |
path=self._parsed_url.path) |
|
2009 |
return auth |
|
2010 |
||
2011 |
def get_smart_medium(self): |
|
2012 |
"""See Transport.get_smart_medium.""" |
|
2013 |
if self._medium is None: |
|
2014 |
# Since medium holds some state (smart server probing at least), we
|
|
2015 |
# need to keep it around. Note that this is needed because medium
|
|
2016 |
# has the same 'base' attribute as the transport so it can't be
|
|
2017 |
# shared between transports having different bases.
|
|
2018 |
self._medium = SmartClientHTTPMedium(self) |
|
2019 |
return self._medium |
|
2020 |
||
2021 |
def _degrade_range_hint(self, relpath, ranges): |
|
2022 |
if self._range_hint == 'multi': |
|
2023 |
self._range_hint = 'single' |
|
2024 |
mutter('Retry "%s" with single range request' % relpath) |
|
2025 |
elif self._range_hint == 'single': |
|
2026 |
self._range_hint = None |
|
2027 |
mutter('Retry "%s" without ranges' % relpath) |
|
2028 |
else: |
|
2029 |
# We tried all the tricks, but nothing worked, caller must reraise.
|
|
2030 |
return False |
|
2031 |
return True |
|
2032 |
||
2033 |
# _coalesce_offsets is a helper for readv, it try to combine ranges without
|
|
2034 |
# degrading readv performances. _bytes_to_read_before_seek is the value
|
|
2035 |
# used for the limit parameter and has been tuned for other transports. For
|
|
2036 |
# HTTP, the name is inappropriate but the parameter is still useful and
|
|
2037 |
# helps reduce the number of chunks in the response. The overhead for a
|
|
2038 |
# chunk (headers, length, footer around the data itself is variable but
|
|
2039 |
# around 50 bytes. We use 128 to reduce the range specifiers that appear in
|
|
2040 |
# the header, some servers (notably Apache) enforce a maximum length for a
|
|
2041 |
# header and issue a '400: Bad request' error when too much ranges are
|
|
2042 |
# specified.
|
|
2043 |
_bytes_to_read_before_seek = 128 |
|
2044 |
# No limit on the offset number that get combined into one, we are trying
|
|
2045 |
# to avoid downloading the whole file.
|
|
2046 |
_max_readv_combine = 0 |
|
2047 |
# By default Apache has a limit of ~400 ranges before replying with a 400
|
|
2048 |
# Bad Request. So we go underneath that amount to be safe.
|
|
2049 |
_max_get_ranges = 200 |
|
2050 |
# We impose no limit on the range size. But see _pycurl.py for a different
|
|
2051 |
# use.
|
|
2052 |
_get_max_size = 0 |
|
2053 |
||
2054 |
def _readv(self, relpath, offsets): |
|
2055 |
"""Get parts of the file at the given relative path. |
|
2056 |
||
2057 |
:param offsets: A list of (offset, size) tuples.
|
|
2058 |
:param return: A list or generator of (offset, data) tuples
|
|
2059 |
"""
|
|
2060 |
# offsets may be a generator, we will iterate it several times, so
|
|
2061 |
# build a list
|
|
2062 |
offsets = list(offsets) |
|
2063 |
||
2064 |
try_again = True |
|
2065 |
retried_offset = None |
|
2066 |
while try_again: |
|
2067 |
try_again = False |
|
2068 |
||
2069 |
# Coalesce the offsets to minimize the GET requests issued
|
|
2070 |
sorted_offsets = sorted(offsets) |
|
2071 |
coalesced = self._coalesce_offsets( |
|
2072 |
sorted_offsets, limit=self._max_readv_combine, |
|
2073 |
fudge_factor=self._bytes_to_read_before_seek, |
|
2074 |
max_size=self._get_max_size) |
|
2075 |
||
2076 |
# Turn it into a list, we will iterate it several times
|
|
2077 |
coalesced = list(coalesced) |
|
2078 |
if 'http' in debug.debug_flags: |
|
2079 |
mutter('http readv of %s offsets => %s collapsed %s', |
|
2080 |
relpath, len(offsets), len(coalesced)) |
|
2081 |
||
2082 |
# Cache the data read, but only until it's been used
|
|
2083 |
data_map = {} |
|
2084 |
# We will iterate on the data received from the GET requests and
|
|
2085 |
# serve the corresponding offsets respecting the initial order. We
|
|
2086 |
# need an offset iterator for that.
|
|
2087 |
iter_offsets = iter(offsets) |
|
2088 |
try: |
|
2089 |
cur_offset_and_size = next(iter_offsets) |
|
2090 |
except StopIteration: |
|
2091 |
return
|
|
2092 |
||
2093 |
try: |
|
2094 |
for cur_coal, rfile in self._coalesce_readv(relpath, coalesced): |
|
2095 |
# Split the received chunk
|
|
2096 |
for offset, size in cur_coal.ranges: |
|
2097 |
start = cur_coal.start + offset |
|
2098 |
rfile.seek(start, os.SEEK_SET) |
|
2099 |
data = rfile.read(size) |
|
2100 |
data_len = len(data) |
|
2101 |
if data_len != size: |
|
2102 |
raise errors.ShortReadvError(relpath, start, size, |
|
2103 |
actual=data_len) |
|
2104 |
if (start, size) == cur_offset_and_size: |
|
2105 |
# The offset requested are sorted as the coalesced
|
|
2106 |
# ones, no need to cache. Win !
|
|
2107 |
yield cur_offset_and_size[0], data |
|
2108 |
try: |
|
2109 |
cur_offset_and_size = next(iter_offsets) |
|
2110 |
except StopIteration: |
|
2111 |
return
|
|
2112 |
else: |
|
2113 |
# Different sorting. We need to cache.
|
|
2114 |
data_map[(start, size)] = data |
|
2115 |
||
2116 |
# Yield everything we can
|
|
2117 |
while cur_offset_and_size in data_map: |
|
2118 |
# Clean the cached data since we use it
|
|
2119 |
# XXX: will break if offsets contains duplicates --
|
|
2120 |
# vila20071129
|
|
2121 |
this_data = data_map.pop(cur_offset_and_size) |
|
2122 |
yield cur_offset_and_size[0], this_data |
|
2123 |
try: |
|
2124 |
cur_offset_and_size = next(iter_offsets) |
|
2125 |
except StopIteration: |
|
2126 |
return
|
|
2127 |
||
2128 |
except (errors.ShortReadvError, errors.InvalidRange, |
|
2129 |
errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e: |
|
2130 |
mutter('Exception %r: %s during http._readv', e, e) |
|
2131 |
if (not isinstance(e, errors.ShortReadvError) |
|
2132 |
or retried_offset == cur_offset_and_size): |
|
2133 |
# We don't degrade the range hint for ShortReadvError since
|
|
2134 |
# they do not indicate a problem with the server ability to
|
|
2135 |
# handle ranges. Except when we fail to get back a required
|
|
2136 |
# offset twice in a row. In that case, falling back to
|
|
2137 |
# single range or whole file should help.
|
|
2138 |
if not self._degrade_range_hint(relpath, coalesced): |
|
2139 |
raise
|
|
2140 |
# Some offsets may have been already processed, so we retry
|
|
2141 |
# only the unsuccessful ones.
|
|
2142 |
offsets = [cur_offset_and_size] + [o for o in iter_offsets] |
|
2143 |
retried_offset = cur_offset_and_size |
|
2144 |
try_again = True |
|
2145 |
||
2146 |
def _coalesce_readv(self, relpath, coalesced): |
|
2147 |
"""Issue several GET requests to satisfy the coalesced offsets""" |
|
2148 |
||
2149 |
def get_and_yield(relpath, coalesced): |
|
2150 |
if coalesced: |
|
2151 |
# Note that the _get below may raise
|
|
2152 |
# errors.InvalidHttpRange. It's the caller's responsibility to
|
|
2153 |
# decide how to retry since it may provide different coalesced
|
|
2154 |
# offsets.
|
|
2155 |
code, rfile = self._get(relpath, coalesced) |
|
2156 |
for coal in coalesced: |
|
2157 |
yield coal, rfile |
|
2158 |
||
2159 |
if self._range_hint is None: |
|
2160 |
# Download whole file
|
|
2161 |
for c, rfile in get_and_yield(relpath, coalesced): |
|
2162 |
yield c, rfile |
|
2163 |
else: |
|
2164 |
total = len(coalesced) |
|
2165 |
if self._range_hint == 'multi': |
|
2166 |
max_ranges = self._max_get_ranges |
|
2167 |
elif self._range_hint == 'single': |
|
2168 |
max_ranges = total |
|
2169 |
else: |
|
2170 |
raise AssertionError("Unknown _range_hint %r" |
|
2171 |
% (self._range_hint,)) |
|
2172 |
# TODO: Some web servers may ignore the range requests and return
|
|
2173 |
# the whole file, we may want to detect that and avoid further
|
|
2174 |
# requests.
|
|
2175 |
# Hint: test_readv_multiple_get_requests will fail once we do that
|
|
2176 |
cumul = 0 |
|
2177 |
ranges = [] |
|
2178 |
for coal in coalesced: |
|
2179 |
if ((self._get_max_size > 0 |
|
2180 |
and cumul + coal.length > self._get_max_size) or |
|
2181 |
len(ranges) >= max_ranges): |
|
2182 |
# Get that much and yield
|
|
2183 |
for c, rfile in get_and_yield(relpath, ranges): |
|
2184 |
yield c, rfile |
|
2185 |
# Restart with the current offset
|
|
2186 |
ranges = [coal] |
|
2187 |
cumul = coal.length |
|
2188 |
else: |
|
2189 |
ranges.append(coal) |
|
2190 |
cumul += coal.length |
|
2191 |
# Get the rest and yield
|
|
2192 |
for c, rfile in get_and_yield(relpath, ranges): |
|
2193 |
yield c, rfile |
|
2194 |
||
2195 |
def recommended_page_size(self): |
|
2196 |
"""See Transport.recommended_page_size(). |
|
2197 |
||
2198 |
For HTTP we suggest a large page size to reduce the overhead
|
|
2199 |
introduced by latency.
|
|
2200 |
"""
|
|
2201 |
return 64 * 1024 |
|
2202 |
||
2203 |
def _post(self, body_bytes): |
|
2204 |
"""POST body_bytes to .bzr/smart on this transport. |
|
2205 |
||
2206 |
:returns: (response code, response body file-like object).
|
|
2207 |
"""
|
|
2208 |
# TODO: Requiring all the body_bytes to be available at the beginning of
|
|
2209 |
# the POST may require large client buffers. It would be nice to have
|
|
2210 |
# an interface that allows streaming via POST when possible (and
|
|
2211 |
# degrades to a local buffer when not).
|
|
2212 |
abspath = self._remote_path('.bzr/smart') |
|
2213 |
response = self.request( |
|
2214 |
'POST', abspath, body=body_bytes, |
|
2215 |
headers={'Content-Type': 'application/octet-stream'}) |
|
2216 |
if response.status not in (200, 403): |
|
2217 |
raise errors.UnexpectedHttpStatus(abspath, response.status) |
|
2218 |
code = response.status |
|
2219 |
data = handle_response( |
|
2220 |
abspath, code, response.getheader, response) |
|
2221 |
return code, data |
|
2222 |
||
2223 |
def _head(self, relpath): |
|
2224 |
"""Request the HEAD of a file. |
|
2225 |
||
2226 |
Performs the request and leaves callers handle the results.
|
|
2227 |
"""
|
|
2228 |
abspath = self._remote_path(relpath) |
|
2229 |
response = self.request('HEAD', abspath) |
|
2230 |
if response.status not in (200, 404): |
|
2231 |
raise errors.UnexpectedHttpStatus(abspath, response.status) |
|
2232 |
||
2233 |
return response |
|
2234 |
||
2235 |
raise NotImplementedError(self._post) |
|
2236 |
||
2237 |
def put_file(self, relpath, f, mode=None): |
|
2238 |
"""Copy the file-like object into the location. |
|
2239 |
||
2240 |
:param relpath: Location to put the contents, relative to base.
|
|
2241 |
:param f: File-like object.
|
|
2242 |
"""
|
|
2243 |
raise errors.TransportNotPossible('http PUT not supported') |
|
2244 |
||
2245 |
def mkdir(self, relpath, mode=None): |
|
2246 |
"""Create a directory at the given path.""" |
|
2247 |
raise errors.TransportNotPossible('http does not support mkdir()') |
|
2248 |
||
2249 |
def rmdir(self, relpath): |
|
2250 |
"""See Transport.rmdir.""" |
|
2251 |
raise errors.TransportNotPossible('http does not support rmdir()') |
|
2252 |
||
2253 |
def append_file(self, relpath, f, mode=None): |
|
2254 |
"""Append the text in the file-like object into the final |
|
2255 |
location.
|
|
2256 |
"""
|
|
2257 |
raise errors.TransportNotPossible('http does not support append()') |
|
2258 |
||
2259 |
def copy(self, rel_from, rel_to): |
|
2260 |
"""Copy the item at rel_from to the location at rel_to""" |
|
2261 |
raise errors.TransportNotPossible('http does not support copy()') |
|
2262 |
||
2263 |
def copy_to(self, relpaths, other, mode=None, pb=None): |
|
2264 |
"""Copy a set of entries from self into another Transport. |
|
2265 |
||
2266 |
:param relpaths: A list/generator of entries to be copied.
|
|
2267 |
||
2268 |
TODO: if other is LocalTransport, is it possible to
|
|
2269 |
do better than put(get())?
|
|
2270 |
"""
|
|
2271 |
# At this point HttpTransport might be able to check and see if
|
|
2272 |
# the remote location is the same, and rather than download, and
|
|
2273 |
# then upload, it could just issue a remote copy_this command.
|
|
2274 |
if isinstance(other, HttpTransport): |
|
2275 |
raise errors.TransportNotPossible( |
|
2276 |
'http cannot be the target of copy_to()') |
|
2277 |
else: |
|
2278 |
return super(HttpTransport, self).\ |
|
2279 |
copy_to(relpaths, other, mode=mode, pb=pb) |
|
2280 |
||
2281 |
def move(self, rel_from, rel_to): |
|
2282 |
"""Move the item at rel_from to the location at rel_to""" |
|
2283 |
raise errors.TransportNotPossible('http does not support move()') |
|
2284 |
||
2285 |
def delete(self, relpath): |
|
2286 |
"""Delete the item at relpath""" |
|
2287 |
raise errors.TransportNotPossible('http does not support delete()') |
|
2288 |
||
2289 |
def external_url(self): |
|
2290 |
"""See breezy.transport.Transport.external_url.""" |
|
2291 |
# HTTP URL's are externally usable as long as they don't mention their
|
|
2292 |
# implementation qualifier
|
|
2293 |
url = self._parsed_url.clone() |
|
2294 |
url.scheme = self._unqualified_scheme |
|
2295 |
return str(url) |
|
2296 |
||
2297 |
def is_readonly(self): |
|
2298 |
"""See Transport.is_readonly.""" |
|
2299 |
return True |
|
2300 |
||
2301 |
def listable(self): |
|
2302 |
"""See Transport.listable.""" |
|
2303 |
return False |
|
2304 |
||
2305 |
def stat(self, relpath): |
|
2306 |
"""Return the stat information for a file. |
|
2307 |
"""
|
|
2308 |
raise errors.TransportNotPossible('http does not support stat()') |
|
2309 |
||
2310 |
def lock_read(self, relpath): |
|
2311 |
"""Lock the given file for shared (read) access. |
|
2312 |
:return: A lock object, which should be passed to Transport.unlock()
|
|
2313 |
"""
|
|
2314 |
# The old RemoteBranch ignore lock for reading, so we will
|
|
2315 |
# continue that tradition and return a bogus lock object.
|
|
2316 |
class BogusLock(object): |
|
2317 |
def __init__(self, path): |
|
2318 |
self.path = path |
|
2319 |
||
2320 |
def unlock(self): |
|
2321 |
pass
|
|
2322 |
return BogusLock(relpath) |
|
2323 |
||
2324 |
def lock_write(self, relpath): |
|
2325 |
"""Lock the given file for exclusive (write) access. |
|
2326 |
WARNING: many transports do not support this, so trying avoid using it
|
|
2327 |
||
2328 |
:return: A lock object, which should be passed to Transport.unlock()
|
|
2329 |
"""
|
|
2330 |
raise errors.TransportNotPossible('http does not support lock_write()') |
|
2331 |
||
2332 |
def _attempted_range_header(self, offsets, tail_amount): |
|
2333 |
"""Prepare a HTTP Range header at a level the server should accept. |
|
2334 |
||
2335 |
:return: the range header representing offsets/tail_amount or None if
|
|
2336 |
no header can be built.
|
|
2337 |
"""
|
|
2338 |
||
2339 |
if self._range_hint == 'multi': |
|
2340 |
# Generate the header describing all offsets
|
|
2341 |
return self._range_header(offsets, tail_amount) |
|
2342 |
elif self._range_hint == 'single': |
|
2343 |
# Combine all the requested ranges into a single
|
|
2344 |
# encompassing one
|
|
2345 |
if len(offsets) > 0: |
|
2346 |
if tail_amount not in (0, None): |
|
2347 |
# Nothing we can do here to combine ranges with tail_amount
|
|
2348 |
# in a single range, just returns None. The whole file
|
|
2349 |
# should be downloaded.
|
|
2350 |
return None |
|
2351 |
else: |
|
2352 |
start = offsets[0].start |
|
2353 |
last = offsets[-1] |
|
2354 |
end = last.start + last.length - 1 |
|
2355 |
whole = self._coalesce_offsets([(start, end - start + 1)], |
|
2356 |
limit=0, fudge_factor=0) |
|
2357 |
return self._range_header(list(whole), 0) |
|
2358 |
else: |
|
2359 |
# Only tail_amount, requested, leave range_header
|
|
2360 |
# do its work
|
|
2361 |
return self._range_header(offsets, tail_amount) |
|
2362 |
else: |
|
2363 |
return None |
|
2364 |
||
2365 |
@staticmethod
|
|
2366 |
def _range_header(ranges, tail_amount): |
|
2367 |
"""Turn a list of bytes ranges into a HTTP Range header value. |
|
2368 |
||
2369 |
:param ranges: A list of _CoalescedOffset
|
|
2370 |
:param tail_amount: The amount to get from the end of the file.
|
|
2371 |
||
2372 |
:return: HTTP range header string.
|
|
2373 |
||
2374 |
At least a non-empty ranges *or* a tail_amount must be
|
|
2375 |
provided.
|
|
2376 |
"""
|
|
2377 |
strings = [] |
|
2378 |
for offset in ranges: |
|
2379 |
strings.append('%d-%d' % (offset.start, |
|
2380 |
offset.start + offset.length - 1)) |
|
2381 |
||
2382 |
if tail_amount: |
|
2383 |
strings.append('-%d' % tail_amount) |
|
2384 |
||
2385 |
return ','.join(strings) |
|
2386 |
||
2387 |
def _redirected_to(self, source, target): |
|
2388 |
"""Returns a transport suitable to re-issue a redirected request. |
|
2389 |
||
2390 |
:param source: The source url as returned by the server.
|
|
2391 |
:param target: The target url as returned by the server.
|
|
2392 |
||
2393 |
The redirection can be handled only if the relpath involved is not
|
|
2394 |
renamed by the redirection.
|
|
2395 |
||
2396 |
:returns: A transport
|
|
2397 |
:raise UnusableRedirect: when the URL can not be reinterpreted
|
|
2398 |
"""
|
|
2399 |
parsed_source = self._split_url(source) |
|
2400 |
parsed_target = self._split_url(target) |
|
2401 |
pl = len(self._parsed_url.path) |
|
2402 |
# determine the excess tail - the relative path that was in
|
|
2403 |
# the original request but not part of this transports' URL.
|
|
2404 |
excess_tail = parsed_source.path[pl:].strip("/") |
|
2405 |
if not parsed_target.path.endswith(excess_tail): |
|
2406 |
# The final part of the url has been renamed, we can't handle the
|
|
2407 |
# redirection.
|
|
2408 |
raise UnusableRedirect( |
|
2409 |
source, target, "final part of the url was renamed") |
|
2410 |
||
2411 |
target_path = parsed_target.path |
|
2412 |
if excess_tail: |
|
2413 |
# Drop the tail that was in the redirect but not part of
|
|
2414 |
# the path of this transport.
|
|
2415 |
target_path = target_path[:-len(excess_tail)] |
|
2416 |
||
2417 |
if parsed_target.scheme in ('http', 'https'): |
|
2418 |
# Same protocol family (i.e. http[s]), we will preserve the same
|
|
2419 |
# http client implementation when a redirection occurs from one to
|
|
2420 |
# the other (otherwise users may be surprised that bzr switches
|
|
2421 |
# from one implementation to the other, and devs may suffer
|
|
2422 |
# debugging it).
|
|
2423 |
if (parsed_target.scheme == self._unqualified_scheme |
|
2424 |
and parsed_target.host == self._parsed_url.host |
|
2425 |
and parsed_target.port == self._parsed_url.port |
|
2426 |
and (parsed_target.user is None or |
|
2427 |
parsed_target.user == self._parsed_url.user)): |
|
2428 |
# If a user is specified, it should match, we don't care about
|
|
2429 |
# passwords, wrong passwords will be rejected anyway.
|
|
2430 |
return self.clone(target_path) |
|
2431 |
else: |
|
2432 |
# Rebuild the url preserving the scheme qualification and the
|
|
2433 |
# credentials (if they don't apply, the redirected to server
|
|
2434 |
# will tell us, but if they do apply, we avoid prompting the
|
|
2435 |
# user)
|
|
2436 |
redir_scheme = parsed_target.scheme |
|
2437 |
new_url = self._unsplit_url(redir_scheme, |
|
2438 |
self._parsed_url.user, |
|
2439 |
self._parsed_url.password, |
|
2440 |
parsed_target.host, parsed_target.port, |
|
2441 |
target_path) |
|
2442 |
return transport.get_transport_from_url(new_url) |
|
2443 |
else: |
|
2444 |
# Redirected to a different protocol
|
|
2445 |
new_url = self._unsplit_url(parsed_target.scheme, |
|
2446 |
parsed_target.user, |
|
2447 |
parsed_target.password, |
|
2448 |
parsed_target.host, parsed_target.port, |
|
2449 |
target_path) |
|
2450 |
return transport.get_transport_from_url(new_url) |
|
2451 |
||
2452 |
def _options(self, relpath): |
|
2453 |
abspath = self._remote_path(relpath) |
|
2454 |
resp = self.request('OPTIONS', abspath) |
|
2455 |
if resp.status == 404: |
|
2456 |
raise errors.NoSuchFile(abspath) |
|
2457 |
if resp.status in (403, 405): |
|
2458 |
raise errors.InvalidHttpResponse( |
|
2459 |
abspath, |
|
2460 |
"OPTIONS not supported or forbidden for remote URL") |
|
2461 |
return resp.getheaders() |
|
2462 |
||
2463 |
||
2464 |
# TODO: May be better located in smart/medium.py with the other
|
|
2465 |
# SmartMedium classes
|
|
2466 |
class SmartClientHTTPMedium(medium.SmartClientMedium): |
|
2467 |
||
2468 |
def __init__(self, http_transport): |
|
2469 |
super(SmartClientHTTPMedium, self).__init__(http_transport.base) |
|
2470 |
# We don't want to create a circular reference between the http
|
|
2471 |
# transport and its associated medium. Since the transport will live
|
|
2472 |
# longer than the medium, the medium keep only a weak reference to its
|
|
2473 |
# transport.
|
|
2474 |
self._http_transport_ref = weakref.ref(http_transport) |
|
2475 |
||
2476 |
def get_request(self): |
|
2477 |
return SmartClientHTTPMediumRequest(self) |
|
2478 |
||
2479 |
def should_probe(self): |
|
2480 |
return True |
|
2481 |
||
2482 |
def remote_path_from_transport(self, transport): |
|
2483 |
# Strip the optional 'bzr+' prefix from transport so it will have the
|
|
2484 |
# same scheme as self.
|
|
2485 |
transport_base = transport.base |
|
2486 |
if transport_base.startswith('bzr+'): |
|
2487 |
transport_base = transport_base[4:] |
|
2488 |
rel_url = urlutils.relative_url(self.base, transport_base) |
|
2489 |
return urlutils.unquote(rel_url) |
|
2490 |
||
2491 |
def send_http_smart_request(self, bytes): |
|
2492 |
try: |
|
2493 |
# Get back the http_transport hold by the weak reference
|
|
2494 |
t = self._http_transport_ref() |
|
2495 |
code, body_filelike = t._post(bytes) |
|
2496 |
if code != 200: |
|
2497 |
raise errors.UnexpectedHttpStatus( |
|
2498 |
t._remote_path('.bzr/smart'), code) |
|
2499 |
except (errors.InvalidHttpResponse, errors.ConnectionReset) as e: |
|
2500 |
raise errors.SmartProtocolError(str(e)) |
|
2501 |
return body_filelike |
|
2502 |
||
2503 |
def _report_activity(self, bytes, direction): |
|
2504 |
"""See SmartMedium._report_activity. |
|
2505 |
||
2506 |
Does nothing; the underlying plain HTTP transport will report the
|
|
2507 |
activity that this medium would report.
|
|
2508 |
"""
|
|
2509 |
pass
|
|
2510 |
||
2511 |
def disconnect(self): |
|
2512 |
"""See SmartClientMedium.disconnect().""" |
|
2513 |
t = self._http_transport_ref() |
|
2514 |
t.disconnect() |
|
2515 |
||
2516 |
||
2517 |
# TODO: May be better located in smart/medium.py with the other
|
|
2518 |
# SmartMediumRequest classes
|
|
2519 |
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest): |
|
2520 |
"""A SmartClientMediumRequest that works with an HTTP medium.""" |
|
2521 |
||
2522 |
def __init__(self, client_medium): |
|
2523 |
medium.SmartClientMediumRequest.__init__(self, client_medium) |
|
2524 |
self._buffer = b'' |
|
2525 |
||
2526 |
def _accept_bytes(self, bytes): |
|
2527 |
self._buffer += bytes |
|
2528 |
||
2529 |
def _finished_writing(self): |
|
2530 |
data = self._medium.send_http_smart_request(self._buffer) |
|
2531 |
self._response_body = data |
|
2532 |
||
2533 |
def _read_bytes(self, count): |
|
2534 |
"""See SmartClientMediumRequest._read_bytes.""" |
|
2535 |
return self._response_body.read(count) |
|
2536 |
||
2537 |
def _read_line(self): |
|
2538 |
line, excess = medium._get_line(self._response_body.read) |
|
2539 |
if excess != b'': |
|
2540 |
raise AssertionError( |
|
2541 |
'_get_line returned excess bytes, but this mediumrequest '
|
|
2542 |
'cannot handle excess. (%r)' % (excess,)) |
|
2543 |
return line |
|
2544 |
||
2545 |
def _finished_reading(self): |
|
2546 |
"""See SmartClientMediumRequest._finished_reading.""" |
|
2547 |
pass
|
|
2548 |
||
2549 |
||
2550 |
def unhtml_roughly(maybe_html, length_limit=1000): |
|
2551 |
"""Very approximate html->text translation, for presenting error bodies. |
|
2552 |
||
2553 |
:param length_limit: Truncate the result to this many characters.
|
|
2554 |
||
2555 |
>>> unhtml_roughly("<b>bad</b> things happened\\n")
|
|
2556 |
' bad things happened '
|
|
2557 |
"""
|
|
2558 |
return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit] |
|
2559 |
||
2560 |
||
2561 |
def get_test_permutations(): |
|
2562 |
"""Return the permutations to be used in testing.""" |
|
2563 |
from breezy.tests import ( |
|
2564 |
features, |
|
2565 |
http_server, |
|
2566 |
)
|
|
2567 |
permutations = [(HttpTransport, http_server.HttpServer), ] |
|
2568 |
if features.HTTPSServerFeature.available(): |
|
2569 |
from breezy.tests import ( |
|
2570 |
https_server, |
|
2571 |
ssl_certs, |
|
2572 |
)
|
|
2573 |
||
2574 |
class HTTPS_transport(HttpTransport): |
|
2575 |
||
2576 |
def __init__(self, base, _from_transport=None): |
|
2577 |
super(HTTPS_transport, self).__init__( |
|
2578 |
base, _from_transport=_from_transport, |
|
2579 |
ca_certs=ssl_certs.build_path('ca.crt')) |
|
2580 |
||
2581 |
permutations.append((HTTPS_transport, |
|
2582 |
https_server.HTTPSServer)) |
|
2583 |
return permutations |