1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Implementaion of urllib2 tailored to bzr needs
19
This file complements the urllib2 class hierarchy with custom classes.
21
For instance, we create a new HTTPConnection and HTTPSConnection that inherit
22
from the original urllib2.HTTP(s)Connection objects, but also have a new base
23
which implements a custom getresponse and cleanup_pipe handlers.
25
And then we implement custom HTTPHandler and HTTPSHandler classes, that use
26
the custom HTTPConnection classes.
28
We have a custom Response class, which lets us maintain a keep-alive
29
connection even for requests that urllib2 doesn't expect to contain body data.
31
And a custom Request class that lets us track redirections, and
32
handle authentication schemes.
34
For coherency with python libraries, we use capitalized header names throughout
35
the code, even if the header names will be titled just before sending the
36
request (see AbstractHTTPHandler.do_open).
41
# FIXME: Oversimplifying, two kind of exceptions should be
42
# raised, once a request is issued: URLError before we have been
43
# able to process the response, HTTPError after that. Process the
44
# response means we are able to leave the socket clean, so if we
45
# are not able to do that, we should close the connection. The
46
# actual code more or less do that, tests should be written to
60
from bzrlib import __version__ as bzrlib_version
71
class _BufferedMakefileSocket(object):
73
def __init__(self, sock):
76
def makefile(self, mode='r', bufsize=-1):
77
return self.sock.makefile(mode, 65536)
79
def __getattr__(self, name):
80
return getattr(self.sock, name)
83
# We define our own Response class to keep our httplib pipe clean
84
class Response(httplib.HTTPResponse):
85
"""Custom HTTPResponse, to avoid the need to decorate.
87
httplib prefers to decorate the returned objects, rather
88
than using a custom object.
91
# Some responses have bodies in which we have no interest
92
_body_ignored_responses = [301,302, 303, 307, 401, 403, 404]
94
# in finish() below, we may have to discard several MB in the worst
95
# case. To avoid buffering that much, we read and discard by chunks
96
# instead. The underlying file is either a socket or a StringIO, so reading
97
# 8k chunks should be fine.
98
_discarded_buf_size = 8192
100
def __init__(self, sock, *args, **kwargs):
101
# httplib creates a fileobject that doesn't do buffering, which
102
# makes fp.readline() very expensive because it only reads one byte
103
# at a time. So we wrap the socket in an object that forces
104
# sock.makefile to make a buffered file.
105
sock = _BufferedMakefileSocket(sock)
106
httplib.HTTPResponse.__init__(self, sock, *args, **kwargs)
109
"""Begin to read the response from the server.
111
httplib assumes that some responses get no content and do
112
not even attempt to read the body in that case, leaving
113
the body in the socket, blocking the next request. Let's
114
try to workaround that.
116
httplib.HTTPResponse.begin(self)
117
if self.status in self._body_ignored_responses:
118
if self.debuglevel >= 2:
119
print "For status: [%s]," % self.status,
120
print "will ready body, length: %s" % self.length
121
if not (self.length is None or self.will_close):
122
# In some cases, we just can't read the body not
123
# even try or we may encounter a 104, 'Connection
124
# reset by peer' error if there is indeed no body
125
# and the server closed the connection just after
126
# having issued the response headers (even if the
127
# headers indicate a Content-Type...)
128
body = self.read(self.length)
129
if self.debuglevel >= 9:
130
# This one can be huge and is generally not interesting
131
print "Consumed body: [%s]" % body
133
elif self.status == 200:
134
# Whatever the request is, it went ok, so we surely don't want to
135
# close the connection. Some cases are not correctly detected by
136
# httplib.HTTPConnection.getresponse (called by
137
# httplib.HTTPResponse.begin). The CONNECT response for the https
138
# through proxy case is one. Note: the 'will_close' below refers
139
# to the "true" socket between us and the server, whereas the
140
# 'close()' above refers to the copy of that socket created by
141
# httplib for the response itself. So, in the if above we close the
142
# socket to indicate that we are done with the response whereas
143
# below we keep the socket with the server opened.
144
self.will_close = False
147
"""Finish reading the body.
149
In some cases, the client may have left some bytes to read in the
150
body. That will block the next request to succeed if we use a
151
persistent connection. If we don't use a persistent connection, well,
152
nothing will block the next request since a new connection will be
155
:return: the number of bytes left on the socket (may be None)
158
if not self.isclosed():
159
# Make sure nothing was left to be read on the socket
162
while data and self.length:
163
# read() will update self.length
164
data = self.read(min(self.length, self._discarded_buf_size))
167
trace.mutter("%s bytes left on the HTTP socket", pending)
172
# Not inheriting from 'object' because httplib.HTTPConnection doesn't.
173
class AbstractHTTPConnection:
174
"""A custom HTTP(S) Connection, which can reset itself on a bad response"""
176
response_class = Response
178
# When we detect a server responding with the whole file to range requests,
179
# we want to warn. But not below a given thresold.
180
_range_warning_thresold = 1024 * 1024
183
self._response = None
184
self._ranges_received_whole_file = None
186
def _mutter_connect(self):
187
netloc = '%s:%s' % (self.host, self.port)
188
if self.proxied_host is not None:
189
netloc += '(proxy for %s)' % self.proxied_host
190
trace.mutter('* About to connect() to %s' % netloc)
192
def getresponse(self):
193
"""Capture the response to be able to cleanup"""
194
self._response = httplib.HTTPConnection.getresponse(self)
195
return self._response
197
def cleanup_pipe(self):
198
"""Read the remaining bytes of the last response if any."""
199
if self._response is not None:
200
pending = self._response.finish()
201
# Warn the user (once)
202
if (self._ranges_received_whole_file is None
203
and self._response.status == 200
204
and pending and pending > self._range_warning_thresold
206
self._ranges_received_whole_file = True
208
'Got a 200 response when asking for multiple ranges,'
209
' does your server at %s:%s support range requests?',
210
self.host, self.port)
211
self._response = None
212
# Preserve our preciousss
215
# Let httplib.HTTPConnection do its housekeeping
217
# Restore our preciousss
221
class HTTPConnection(AbstractHTTPConnection, httplib.HTTPConnection):
223
# XXX: Needs refactoring at the caller level.
224
def __init__(self, host, port=None, proxied_host=None):
225
AbstractHTTPConnection.__init__(self)
226
# Use strict=True since we don't support HTTP/0.9
227
httplib.HTTPConnection.__init__(self, host, port, strict=True)
228
self.proxied_host = proxied_host
231
if 'http' in debug.debug_flags:
232
self._mutter_connect()
233
httplib.HTTPConnection.connect(self)
236
# FIXME: Should test for ssl availability
237
class HTTPSConnection(AbstractHTTPConnection, httplib.HTTPSConnection):
239
def __init__(self, host, port=None, key_file=None, cert_file=None,
241
AbstractHTTPConnection.__init__(self)
242
# Use strict=True since we don't support HTTP/0.9
243
httplib.HTTPSConnection.__init__(self, host, port,
244
key_file, cert_file, strict=True)
245
self.proxied_host = proxied_host
248
if 'http' in debug.debug_flags:
249
self._mutter_connect()
250
httplib.HTTPConnection.connect(self)
251
if self.proxied_host is None:
252
self.connect_to_origin()
254
def connect_to_origin(self):
255
ssl = socket.ssl(self.sock, self.key_file, self.cert_file)
256
self.sock = httplib.FakeSocket(self.sock, ssl)
259
class Request(urllib2.Request):
260
"""A custom Request object.
262
urllib2 determines the request method heuristically (based on
263
the presence or absence of data). We set the method
266
The Request object tracks:
267
- the connection the request will be made on.
268
- the authentication parameters needed to preventively set
269
the authentication header once a first authentication have
273
def __init__(self, method, url, data=None, headers={},
274
origin_req_host=None, unverifiable=False,
275
connection=None, parent=None,
276
accepted_errors=None):
277
urllib2.Request.__init__(self, url, data, headers,
278
origin_req_host, unverifiable)
280
self.connection = connection
281
self.accepted_errors = accepted_errors
282
# To handle redirections
284
self.redirected_to = None
285
# Unless told otherwise, redirections are not followed
286
self.follow_redirections = False
287
# auth and proxy_auth are dicts containing, at least
288
# (scheme, host, port, realm, user, password, protocol, path).
289
# The dict entries are mostly handled by the AuthHandler.
290
# Some authentication schemes may add more entries.
293
self.proxied_host = None
295
def get_method(self):
298
def set_proxy(self, proxy, type):
299
"""Set the proxy and remember the proxied host."""
300
self.proxied_host = self.get_host()
301
urllib2.Request.set_proxy(self, proxy, type)
304
class _ConnectRequest(Request):
306
def __init__(self, request):
309
:param request: the first request sent to the proxied host, already
310
processed by the opener (i.e. proxied_host is already set).
312
# We give a fake url and redefine get_selector or urllib2 will be
314
Request.__init__(self, 'CONNECT', request.get_full_url(),
315
connection=request.connection)
316
if request.proxied_host is None:
317
raise AssertionError()
318
self.proxied_host = request.proxied_host
320
def get_selector(self):
321
return self.proxied_host
323
def set_proxy(self, proxy, type):
324
"""Set the proxy without remembering the proxied host.
326
We already know the proxied host by definition, the CONNECT request
327
occurs only when the connection goes through a proxy. The usual
328
processing (masquerade the request so that the connection is done to
329
the proxy while the request is targeted at another host) does not apply
330
here. In fact, the connection is already established with proxy and we
331
just want to enable the SSL tunneling.
333
urllib2.Request.set_proxy(self, proxy, type)
336
class ConnectionHandler(urllib2.BaseHandler):
337
"""Provides connection-sharing by pre-processing requests.
339
urllib2 provides no way to access the HTTPConnection object
340
internally used. But we need it in order to achieve
341
connection sharing. So, we add it to the request just before
342
it is processed, and then we override the do_open method for
343
http[s] requests in AbstractHTTPHandler.
346
handler_order = 1000 # after all pre-processings
348
def create_connection(self, request, http_connection_class):
349
host = request.get_host()
351
# Just a bit of paranoia here, this should have been
352
# handled in the higher levels
353
raise errors.InvalidURL(request.get_full_url(), 'no host given.')
355
# We create a connection (but it will not connect until the first
358
connection = http_connection_class(
359
host, proxied_host=request.proxied_host)
360
except httplib.InvalidURL, exception:
361
# There is only one occurrence of InvalidURL in httplib
362
raise errors.InvalidURL(request.get_full_url(),
363
extra='nonnumeric port')
367
def capture_connection(self, request, http_connection_class):
368
"""Capture or inject the request connection.
371
- the request have no connection: create a new one,
373
- the request have a connection: this one have been used
374
already, let's capture it, so that we can give it to
375
another transport to be reused. We don't do that
376
ourselves: the Transport object get the connection from
377
a first request and then propagate it, from request to
378
request or to cloned transports.
380
connection = request.connection
381
if connection is None:
383
connection = self.create_connection(request, http_connection_class)
384
request.connection = connection
386
# All connections will pass here, propagate debug level
387
connection.set_debuglevel(DEBUG)
390
def http_request(self, request):
391
return self.capture_connection(request, HTTPConnection)
393
def https_request(self, request):
394
return self.capture_connection(request, HTTPSConnection)
397
class AbstractHTTPHandler(urllib2.AbstractHTTPHandler):
398
"""A custom handler for HTTP(S) requests.
400
We overrive urllib2.AbstractHTTPHandler to get a better
401
control of the connection, the ability to implement new
402
request types and return a response able to cope with
403
persistent connections.
406
# We change our order to be before urllib2 HTTP[S]Handlers
407
# and be chosen instead of them (the first http_open called
411
_default_headers = {'Pragma': 'no-cache',
412
'Cache-control': 'max-age=0',
413
'Connection': 'Keep-Alive',
414
'User-agent': 'bzr/%s (urllib)' % bzrlib_version,
419
urllib2.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
421
def http_request(self, request):
422
"""Common headers setting"""
424
request.headers.update(self._default_headers.copy())
425
# FIXME: We may have to add the Content-Length header if
426
# we have data to send.
429
def retry_or_raise(self, http_class, request, first_try):
430
"""Retry the request (once) or raise the exception.
432
urllib2 raises exception of application level kind, we
433
just have to translate them.
435
httplib can raise exceptions of transport level (badly
436
formatted dialog, loss of connexion or socket level
437
problems). In that case we should issue the request again
438
(httplib will close and reopen a new connection if
441
# When an exception occurs, we give back the original
442
# Traceback or the bugs are hard to diagnose.
443
exc_type, exc_val, exc_tb = sys.exc_info()
444
if exc_type == socket.gaierror:
445
# No need to retry, that will not help
446
raise errors.ConnectionError("Couldn't resolve host '%s'"
447
% request.get_origin_req_host(),
449
elif isinstance(exc_val, httplib.ImproperConnectionState):
450
# The httplib pipeline is in incorrect state, it's a bug in our
452
raise exc_type, exc_val, exc_tb
455
if self._debuglevel >= 2:
456
print 'Received exception: [%r]' % exc_val
457
print ' On connection: [%r]' % request.connection
458
method = request.get_method()
459
url = request.get_full_url()
460
print ' Will retry, %s %r' % (method, url)
461
request.connection.close()
462
response = self.do_open(http_class, request, False)
464
if self._debuglevel >= 2:
465
print 'Received second exception: [%r]' % exc_val
466
print ' On connection: [%r]' % request.connection
467
if exc_type in (httplib.BadStatusLine, httplib.UnknownProtocol):
468
# httplib.BadStatusLine and
469
# httplib.UnknownProtocol indicates that a
470
# bogus server was encountered or a bad
471
# connection (i.e. transient errors) is
472
# experimented, we have already retried once
473
# for that request so we raise the exception.
474
my_exception = errors.InvalidHttpResponse(
475
request.get_full_url(),
476
'Bad status line received',
479
# All other exception are considered connection related.
481
# socket errors generally occurs for reasons
482
# far outside our scope, so closing the
483
# connection and retrying is the best we can
486
my_exception = errors.ConnectionError(
487
msg= 'while sending %s %s:' % (request.get_method(),
488
request.get_selector()),
491
if self._debuglevel >= 2:
492
print 'On connection: [%r]' % request.connection
493
method = request.get_method()
494
url = request.get_full_url()
495
print ' Failed again, %s %r' % (method, url)
496
print ' Will raise: [%r]' % my_exception
497
raise my_exception, None, exc_tb
500
def do_open(self, http_class, request, first_try=True):
501
"""See urllib2.AbstractHTTPHandler.do_open for the general idea.
503
The request will be retried once if it fails.
505
connection = request.connection
506
if connection is None:
507
raise AssertionError(
508
'Cannot process a request without a connection')
510
# Get all the headers
512
headers.update(request.header_items())
513
headers.update(request.unredirected_hdrs)
514
# Some servers or proxies will choke on headers not properly
515
# cased. httplib/urllib/urllib2 all use capitalize to get canonical
516
# header names, but only python2.5 urllib2 use title() to fix them just
517
# before sending the request. And not all versions of python 2.5 do
518
# that. Since we replace urllib2.AbstractHTTPHandler.do_open we do it
520
headers = dict((name.title(), val) for name, val in headers.iteritems())
523
method = request.get_method()
524
url = request.get_selector()
525
connection._send_request(method, url,
526
# FIXME: implements 100-continue
527
#None, # We don't send the body yet
530
if 'http' in debug.debug_flags:
531
trace.mutter('> %s %s' % (method, url))
532
hdrs = ['%s: %s' % (k, v) for k,v in headers.items()]
533
trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
534
if self._debuglevel >= 1:
535
print 'Request sent: [%r] from (%s)' \
536
% (request, request.connection.sock.getsockname())
537
response = connection.getresponse()
538
convert_to_addinfourl = True
539
except (socket.gaierror, httplib.BadStatusLine, httplib.UnknownProtocol,
540
socket.error, httplib.HTTPException):
541
response = self.retry_or_raise(http_class, request, first_try)
542
convert_to_addinfourl = False
544
# FIXME: HTTPConnection does not fully support 100-continue (the
545
# server responses are just ignored)
548
# mutter('Will send the body')
549
# # We can send the body now
550
# body = request.get_data()
552
# raise URLError("No data given")
553
# connection.send(body)
554
# response = connection.getresponse()
556
if self._debuglevel >= 2:
557
print 'Receives response: %r' % response
558
print ' For: %r(%r)' % (request.get_method(),
559
request.get_full_url())
561
if convert_to_addinfourl:
562
# Shamelessly copied from urllib2
566
fp = socket._fileobject(r, bufsize=65536)
567
resp = urllib2.addinfourl(fp, r.msg, req.get_full_url())
570
resp.version = r.version
571
if self._debuglevel >= 2:
572
print 'Create addinfourl: %r' % resp
573
print ' For: %r(%r)' % (request.get_method(),
574
request.get_full_url())
575
if 'http' in debug.debug_flags:
576
version = 'HTTP/%d.%d'
578
version = version % (resp.version / 10,
581
version = 'HTTP/%r' % resp.version
582
trace.mutter('< %s %s %s' % (version, resp.code,
584
# Use the raw header lines instead of treating resp.info() as a
585
# dict since we may miss duplicated headers otherwise.
586
hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
587
trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
593
class HTTPHandler(AbstractHTTPHandler):
594
"""A custom handler that just thunks into HTTPConnection"""
596
def http_open(self, request):
597
return self.do_open(HTTPConnection, request)
600
class HTTPSHandler(AbstractHTTPHandler):
601
"""A custom handler that just thunks into HTTPSConnection"""
603
https_request = AbstractHTTPHandler.http_request
605
def https_open(self, request):
606
connection = request.connection
607
if connection.sock is None and \
608
connection.proxied_host is not None and \
609
request.get_method() != 'CONNECT' : # Don't loop
610
# FIXME: We need a gazillion connection tests here, but we still
611
# miss a https server :-( :
612
# - with and without proxy
613
# - with and without certificate
614
# - with self-signed certificate
615
# - with and without authentication
616
# - with good and bad credentials (especially the proxy auth around
618
# - with basic and digest schemes
619
# - reconnection on errors
620
# - connection persistence behaviour (including reconnection)
622
# We are about to connect for the first time via a proxy, we must
623
# issue a CONNECT request first to establish the encrypted link
624
connect = _ConnectRequest(request)
625
response = self.parent.open(connect)
626
if response.code != 200:
627
raise ConnectionError("Can't connect to %s via proxy %s" % (
628
connect.proxied_host, self.host))
630
connection.cleanup_pipe()
631
# Establish the connection encryption
632
connection.connect_to_origin()
633
# Propagate the connection to the original request
634
request.connection = connection
635
return self.do_open(HTTPSConnection, request)
637
class HTTPRedirectHandler(urllib2.HTTPRedirectHandler):
638
"""Handles redirect requests.
640
We have to implement our own scheme because we use a specific
641
Request object and because we want to implement a specific
645
# RFC2616 says that only read requests should be redirected
646
# without interacting with the user. But bzr use some
647
# shortcuts to optimize against roundtrips which can leads to
648
# write requests being issued before read requests of
649
# containing dirs can be redirected. So we redirect write
650
# requests in the same way which seems to respect the spirit
651
# of the RFC if not its letter.
653
def redirect_request(self, req, fp, code, msg, headers, newurl):
654
"""See urllib2.HTTPRedirectHandler.redirect_request"""
655
# We would have preferred to update the request instead
656
# of creating a new one, but the urllib2.Request object
657
# has a too complicated creation process to provide a
658
# simple enough equivalent update process. Instead, when
659
# redirecting, we only update the following request in
660
# the redirect chain with a reference to the parent
663
# Some codes make no sense in our context and are treated
666
# 300: Multiple choices for different representations of
667
# the URI. Using that mechanisn with bzr will violate the
668
# protocol neutrality of Transport.
670
# 304: Not modified (SHOULD only occurs with conditional
671
# GETs which are not used by our implementation)
673
# 305: Use proxy. I can't imagine this one occurring in
674
# our context-- vila/20060909
676
# 306: Unused (if the RFC says so...)
678
# If the code is 302 and the request is HEAD, some may
679
# think that it is a sufficent hint that the file exists
680
# and that we MAY avoid following the redirections. But
681
# if we want to be sure, we MUST follow them.
683
if code in (301, 302, 303, 307):
684
return Request(req.get_method(),newurl,
685
headers = req.headers,
686
origin_req_host = req.get_origin_req_host(),
688
# TODO: It will be nice to be able to
689
# detect virtual hosts sharing the same
690
# IP address, that will allow us to
691
# share the same connection...
696
raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
698
def http_error_302(self, req, fp, code, msg, headers):
699
"""Requests the redirected to URI.
701
Copied from urllib2 to be able to clean the pipe of the associated
702
connection, *before* issuing the redirected request but *after* having
703
eventually raised an error.
705
# Some servers (incorrectly) return multiple Location headers
706
# (so probably same goes for URI). Use first header.
708
# TODO: Once we get rid of addinfourl objects, the
709
# following will need to be updated to use correct case
711
if 'location' in headers:
712
newurl = headers.getheaders('location')[0]
713
elif 'uri' in headers:
714
newurl = headers.getheaders('uri')[0]
717
if self._debuglevel >= 1:
718
print 'Redirected to: %s (followed: %r)' % (newurl,
719
req.follow_redirections)
720
if req.follow_redirections is False:
721
req.redirected_to = newurl
724
newurl = urlparse.urljoin(req.get_full_url(), newurl)
726
# This call succeeds or raise an error. urllib2 returns
727
# if redirect_request returns None, but our
728
# redirect_request never returns None.
729
redirected_req = self.redirect_request(req, fp, code, msg, headers,
733
# .redirect_dict has a key url if url was previously visited.
734
if hasattr(req, 'redirect_dict'):
735
visited = redirected_req.redirect_dict = req.redirect_dict
736
if (visited.get(newurl, 0) >= self.max_repeats or
737
len(visited) >= self.max_redirections):
738
raise urllib2.HTTPError(req.get_full_url(), code,
739
self.inf_msg + msg, headers, fp)
741
visited = redirected_req.redirect_dict = req.redirect_dict = {}
742
visited[newurl] = visited.get(newurl, 0) + 1
744
# We can close the fp now that we are sure that we won't
745
# use it with HTTPError.
747
# We have all we need already in the response
748
req.connection.cleanup_pipe()
750
return self.parent.open(redirected_req)
752
http_error_301 = http_error_303 = http_error_307 = http_error_302
755
class ProxyHandler(urllib2.ProxyHandler):
756
"""Handles proxy setting.
758
Copied and modified from urllib2 to be able to modify the request during
759
the request pre-processing instead of modifying it at _open time. As we
760
capture (or create) the connection object during request processing, _open
763
The main task is to modify the request so that the connection is done to
764
the proxy while the request still refers to the destination host.
766
Note: the proxy handling *may* modify the protocol used; the request may be
767
against an https server proxied through an http proxy. So, https_request
768
will be called, but later it's really http_open that will be called. This
769
explains why we don't have to call self.parent.open as the urllib2 did.
772
# Proxies must be in front
776
def __init__(self, proxies=None):
777
urllib2.ProxyHandler.__init__(self, proxies)
778
# First, let's get rid of urllib2 implementation
779
for type, proxy in self.proxies.items():
780
if self._debuglevel >= 3:
781
print 'Will unbind %s_open for %r' % (type, proxy)
782
delattr(self, '%s_open' % type)
784
# We are interested only by the http[s] proxies
785
http_proxy = self.get_proxy_env_var('http')
786
https_proxy = self.get_proxy_env_var('https')
788
if http_proxy is not None:
789
if self._debuglevel >= 3:
790
print 'Will bind http_request for %r' % http_proxy
791
setattr(self, 'http_request',
792
lambda request: self.set_proxy(request, 'http'))
794
if https_proxy is not None:
795
if self._debuglevel >= 3:
796
print 'Will bind http_request for %r' % https_proxy
797
setattr(self, 'https_request',
798
lambda request: self.set_proxy(request, 'https'))
800
def get_proxy_env_var(self, name, default_to='all'):
801
"""Get a proxy env var.
803
Note that we indirectly rely on
804
urllib.getproxies_environment taking into account the
805
uppercased values for proxy variables.
808
return self.proxies[name.lower()]
810
if default_to is not None:
811
# Try to get the alternate environment variable
813
return self.proxies[default_to]
818
def proxy_bypass(self, host):
819
"""Check if host should be proxied or not"""
820
no_proxy = self.get_proxy_env_var('no', default_to=None)
823
hhost, hport = urllib.splitport(host)
824
# Does host match any of the domains mentioned in
825
# no_proxy ? The rules about what is authorized in no_proxy
826
# are fuzzy (to say the least). We try to allow most
827
# commonly seen values.
828
for domain in no_proxy.split(','):
829
dhost, dport = urllib.splitport(domain)
830
if hport == dport or dport is None:
832
dhost = dhost.replace(".", r"\.")
833
dhost = dhost.replace("*", r".*")
834
dhost = dhost.replace("?", r".")
835
if re.match(dhost, hhost, re.IGNORECASE):
837
# Nevertheless, there are platform-specific ways to
839
return urllib.proxy_bypass(host)
841
def set_proxy(self, request, type):
842
if self.proxy_bypass(request.get_host()):
845
proxy = self.get_proxy_env_var(type)
846
if self._debuglevel >= 3:
847
print 'set_proxy %s_request for %r' % (type, proxy)
848
# FIXME: python 2.5 urlparse provides a better _parse_proxy which can
849
# grok user:password@host:port as well as
850
# http://user:password@host:port
852
(scheme, user, password,
853
host, port, path) = transport.ConnectedTransport._split_url(proxy)
855
if request.proxy_auth == {}:
856
# No proxy auth parameter are available, we are handling the first
857
# proxied request, intialize. scheme (the authentication scheme)
858
# and realm will be set by the AuthHandler
859
request.proxy_auth = {
860
'host': host, 'port': port,
861
'user': user, 'password': password,
863
# We ignore path since we connect to a proxy
868
phost = host + ':%d' % port
869
request.set_proxy(phost, type)
870
if self._debuglevel >= 3:
871
print 'set_proxy: proxy set to %s://%s' % (type, phost)
875
class AbstractAuthHandler(urllib2.BaseHandler):
876
"""A custom abstract authentication handler for all http authentications.
878
Provides the meat to handle authentication errors and
879
preventively set authentication headers after the first
880
successful authentication.
882
This can be used for http and proxy, as well as for basic and
883
digest authentications.
885
This provides an unified interface for all authentication handlers
886
(urllib2 provides far too many with different policies).
888
The interaction between this handler and the urllib2
889
framework is not obvious, it works as follow:
891
opener.open(request) is called:
893
- that may trigger http_request which will add an authentication header
894
(self.build_header) if enough info is available.
896
- the request is sent to the server,
898
- if an authentication error is received self.auth_required is called,
899
we acquire the authentication info in the error headers and call
900
self.auth_match to check that we are able to try the
901
authentication and complete the authentication parameters,
903
- we call parent.open(request), that may trigger http_request
904
and will add a header (self.build_header), but here we have
905
all the required info (keep in mind that the request and
906
authentication used in the recursive calls are really (and must be)
909
- if the call returns a response, the authentication have been
910
successful and the request authentication parameters have been updated.
914
"""We don't want to retry authenticating endlessly"""
916
# The following attributes should be defined by daughter
918
# - auth_required_header: the header received from the server
919
# - auth_header: the header sent in the request
922
# We want to know when we enter into an try/fail cycle of
923
# authentications so we initialize to None to indicate that we aren't
924
# in such a cycle by default.
925
self._retry_count = None
927
def update_auth(self, auth, key, value):
928
"""Update a value in auth marking the auth as modified if needed"""
929
old_value = auth.get(key, None)
930
if old_value != value:
932
auth['modified'] = True
934
def auth_required(self, request, headers):
935
"""Retry the request if the auth scheme is ours.
937
:param request: The request needing authentication.
938
:param headers: The headers for the authentication error response.
939
:return: None or the response for the authenticated request.
941
# Don't try to authenticate endlessly
942
if self._retry_count is None:
943
# The retry being recusrsive calls, None identify the first retry
944
self._retry_count = 1
946
self._retry_count += 1
947
if self._retry_count > self._max_retry:
948
# Let's be ready for next round
949
self._retry_count = None
951
server_header = headers.get(self.auth_required_header, None)
952
if server_header is None:
953
# The http error MUST have the associated
954
# header. This must never happen in production code.
955
raise KeyError('%s not found' % self.auth_required_header)
957
auth = self.get_auth(request)
958
if auth.get('user', None) is None:
959
# Without a known user, we can't authenticate
962
auth['modified'] = False
963
if self.auth_match(server_header, auth):
964
# auth_match may have modified auth (by adding the
965
# password or changing the realm, for example)
966
if (request.get_header(self.auth_header, None) is not None
967
and not auth['modified']):
968
# We already tried that, give up
972
request.connection.cleanup_pipe()
973
response = self.parent.open(request)
975
self.auth_successful(request, response)
977
# We are not qualified to handle the authentication.
978
# Note: the authentication error handling will try all
979
# available handlers. If one of them authenticates
980
# successfully, a response will be returned. If none of
981
# them succeeds, None will be returned and the error
982
# handler will raise the 401 'Unauthorized' or the 407
983
# 'Proxy Authentication Required' error.
986
def add_auth_header(self, request, header):
987
"""Add the authentication header to the request"""
988
request.add_unredirected_header(self.auth_header, header)
990
def auth_match(self, header, auth):
991
"""Check that we are able to handle that authentication scheme.
993
The request authentication parameters may need to be
994
updated with info from the server. Some of these
995
parameters, when combined, are considered to be the
996
authentication key, if one of them change the
997
authentication result may change. 'user' and 'password'
998
are exampls, but some auth schemes may have others
999
(digest's nonce is an example, digest's nonce_count is a
1000
*counter-example*). Such parameters must be updated by
1001
using the update_auth() method.
1003
:param header: The authentication header sent by the server.
1004
:param auth: The auth parameters already known. They may be
1006
:returns: True if we can try to handle the authentication.
1008
raise NotImplementedError(self.auth_match)
1010
def build_auth_header(self, auth, request):
1011
"""Build the value of the header used to authenticate.
1013
:param auth: The auth parameters needed to build the header.
1014
:param request: The request needing authentication.
1016
:return: None or header.
1018
raise NotImplementedError(self.build_auth_header)
1020
def auth_successful(self, request, response):
1021
"""The authentification was successful for the request.
1023
Additional infos may be available in the response.
1025
:param request: The succesfully authenticated request.
1026
:param response: The server response (may contain auth info).
1028
# It may happen that we need to reconnect later, let's be ready
1029
self._retry_count = None
1031
def get_user_password(self, auth):
1032
"""Ask user for a password if none is already available."""
1033
auth_conf = config.AuthenticationConfig()
1035
password = auth['password']
1036
realm = auth['realm']
1039
user = auth.get_user(auth['protocol'], auth['host'],
1040
port=auth['port'], path=auth['path'],
1043
# Default to local user
1044
user = getpass.getuser()
1046
if password is None:
1047
password = auth_conf.get_password(
1048
auth['protocol'], auth['host'], user, port=auth['port'],
1049
path=auth['path'], realm=realm,
1050
prompt=self.build_password_prompt(auth))
1052
return user, password
1054
def _build_password_prompt(self, auth):
1055
"""Build a prompt taking the protocol used into account.
1057
The AuthHandler is used by http and https, we want that information in
1058
the prompt, so we build the prompt from the authentication dict which
1059
contains all the needed parts.
1061
Also, http and proxy AuthHandlers present different prompts to the
1062
user. The daughter classes should implements a public
1063
build_password_prompt using this method.
1065
prompt = '%s' % auth['protocol'].upper() + ' %(user)s@%(host)s'
1066
realm = auth['realm']
1067
if realm is not None:
1068
prompt += ", Realm: '%s'" % realm
1069
prompt += ' password'
1072
def http_request(self, request):
1073
"""Insert an authentication header if information is available"""
1074
auth = self.get_auth(request)
1075
if self.auth_params_reusable(auth):
1076
self.add_auth_header(request, self.build_auth_header(auth, request))
1079
https_request = http_request # FIXME: Need test
1082
class BasicAuthHandler(AbstractAuthHandler):
1083
"""A custom basic authentication handler."""
1087
auth_regexp = re.compile('realm="([^"]*)"', re.I)
1089
def build_auth_header(self, auth, request):
1090
raw = '%s:%s' % (auth['user'], auth['password'])
1091
auth_header = 'Basic ' + raw.encode('base64').strip()
1094
def auth_match(self, header, auth):
1095
scheme, raw_auth = header.split(None, 1)
1096
scheme = scheme.lower()
1097
if scheme != 'basic':
1100
match = self.auth_regexp.search(raw_auth)
1102
realm = match.groups()
1103
if scheme != 'basic':
1106
# Put useful info into auth
1107
self.update_auth(auth, 'scheme', scheme)
1108
self.update_auth(auth, 'realm', realm)
1109
if auth['user'] is None or auth['password'] is None:
1110
user, password = self.get_user_password(auth)
1111
self.update_auth(auth, 'user', user)
1112
self.update_auth(auth, 'password', password)
1113
return match is not None
1115
def auth_params_reusable(self, auth):
1116
# If the auth scheme is known, it means a previous
1117
# authentication was successful, all information is
1118
# available, no further checks are needed.
1119
return auth.get('scheme', None) == 'basic'
1122
def get_digest_algorithm_impls(algorithm):
1125
if algorithm == 'MD5':
1126
H = lambda x: md5.new(x).hexdigest()
1127
elif algorithm == 'SHA':
1128
H = lambda x: sha.new(x).hexdigest()
1130
KD = lambda secret, data: H("%s:%s" % (secret, data))
1134
def get_new_cnonce(nonce, nonce_count):
1135
raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
1136
urllib2.randombytes(8))
1137
return sha.new(raw).hexdigest()[:16]
1140
class DigestAuthHandler(AbstractAuthHandler):
1141
"""A custom digest authentication handler."""
1143
# Before basic as digest is a bit more secure
1146
def auth_params_reusable(self, auth):
1147
# If the auth scheme is known, it means a previous
1148
# authentication was successful, all information is
1149
# available, no further checks are needed.
1150
return auth.get('scheme', None) == 'digest'
1152
def auth_match(self, header, auth):
1153
scheme, raw_auth = header.split(None, 1)
1154
scheme = scheme.lower()
1155
if scheme != 'digest':
1158
# Put the requested authentication info into a dict
1159
req_auth = urllib2.parse_keqv_list(urllib2.parse_http_list(raw_auth))
1161
# Check that we can handle that authentication
1162
qop = req_auth.get('qop', None)
1163
if qop != 'auth': # No auth-int so far
1166
H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
1170
realm = req_auth.get('realm', None)
1171
# Put useful info into auth
1172
self.update_auth(auth, 'scheme', scheme)
1173
self.update_auth(auth, 'realm', realm)
1174
if auth['user'] is None or auth['password'] is None:
1175
user, password = self.get_user_password(auth)
1176
self.update_auth(auth, 'user', user)
1177
self.update_auth(auth, 'password', password)
1180
if req_auth.get('algorithm', None) is not None:
1181
self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1182
nonce = req_auth['nonce']
1183
if auth.get('nonce', None) != nonce:
1184
# A new nonce, never used
1185
self.update_auth(auth, 'nonce_count', 0)
1186
self.update_auth(auth, 'nonce', nonce)
1187
self.update_auth(auth, 'qop', qop)
1188
auth['opaque'] = req_auth.get('opaque', None)
1190
# Some required field is not there
1195
def build_auth_header(self, auth, request):
1196
url_scheme, url_selector = urllib.splittype(request.get_selector())
1197
sel_host, uri = urllib.splithost(url_selector)
1199
A1 = '%s:%s:%s' % (auth['user'], auth['realm'], auth['password'])
1200
A2 = '%s:%s' % (request.get_method(), uri)
1202
nonce = auth['nonce']
1205
nonce_count = auth['nonce_count'] + 1
1206
ncvalue = '%08x' % nonce_count
1207
cnonce = get_new_cnonce(nonce, nonce_count)
1209
H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1210
nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1211
request_digest = KD(H(A1), nonce_data)
1214
header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
1217
header += ', uri="%s"' % uri
1218
header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1219
header += ', qop="%s"' % qop
1220
header += ', response="%s"' % request_digest
1221
# Append the optional fields
1222
opaque = auth.get('opaque', None)
1224
header += ', opaque="%s"' % opaque
1225
if auth.get('algorithm', None):
1226
header += ', algorithm="%s"' % auth.get('algorithm')
1228
# We have used the nonce once more, update the count
1229
auth['nonce_count'] = nonce_count
1234
class HTTPAuthHandler(AbstractAuthHandler):
1235
"""Custom http authentication handler.
1237
Send the authentication preventively to avoid the roundtrip
1238
associated with the 401 error and keep the revelant info in
1239
the auth request attribute.
1242
auth_required_header = 'www-authenticate'
1243
auth_header = 'Authorization'
1245
def get_auth(self, request):
1246
"""Get the auth params from the request"""
1249
def set_auth(self, request, auth):
1250
"""Set the auth params for the request"""
1253
def build_password_prompt(self, auth):
1254
return self._build_password_prompt(auth)
1256
def http_error_401(self, req, fp, code, msg, headers):
1257
return self.auth_required(req, headers)
1260
class ProxyAuthHandler(AbstractAuthHandler):
1261
"""Custom proxy authentication handler.
1263
Send the authentication preventively to avoid the roundtrip
1264
associated with the 407 error and keep the revelant info in
1265
the proxy_auth request attribute..
1268
auth_required_header = 'proxy-authenticate'
1269
# FIXME: the correct capitalization is Proxy-Authorization,
1270
# but python-2.4 urllib2.Request insist on using capitalize()
1271
# instead of title().
1272
auth_header = 'Proxy-authorization'
1274
def get_auth(self, request):
1275
"""Get the auth params from the request"""
1276
return request.proxy_auth
1278
def set_auth(self, request, auth):
1279
"""Set the auth params for the request"""
1280
request.proxy_auth = auth
1282
def build_password_prompt(self, auth):
1283
prompt = self._build_password_prompt(auth)
1284
prompt = 'Proxy ' + prompt
1287
def http_error_407(self, req, fp, code, msg, headers):
1288
return self.auth_required(req, headers)
1291
class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
1292
"""Custom http basic authentication handler"""
1295
class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
1296
"""Custom proxy basic authentication handler"""
1299
class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
1300
"""Custom http basic authentication handler"""
1303
class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
1304
"""Custom proxy basic authentication handler"""
1307
class HTTPErrorProcessor(urllib2.HTTPErrorProcessor):
1308
"""Process HTTP error responses.
1310
We don't really process the errors, quite the contrary
1311
instead, we leave our Transport handle them.
1314
accepted_errors = [200, # Ok
1315
206, # Partial content
1318
"""The error codes the caller will handle.
1320
This can be specialized in the request on a case-by case basis, but the
1321
common cases are covered here.
1324
def http_response(self, request, response):
1325
code, msg, hdrs = response.code, response.msg, response.info()
1327
accepted_errors = request.accepted_errors
1328
if accepted_errors is None:
1329
accepted_errors = self.accepted_errors
1331
if code not in accepted_errors:
1332
response = self.parent.error('http', request, response,
1336
https_response = http_response
1339
class HTTPDefaultErrorHandler(urllib2.HTTPDefaultErrorHandler):
1340
"""Translate common errors into bzr Exceptions"""
1342
def http_error_default(self, req, fp, code, msg, hdrs):
1344
raise errors.TransportError(
1345
'Server refuses to fulfill the request (403 Forbidden)'
1346
' for %s' % req.get_full_url())
1348
raise errors.InvalidHttpResponse(req.get_full_url(),
1349
'Unable to handle http code %d: %s'
1353
class Opener(object):
1354
"""A wrapper around urllib2.build_opener
1356
Daughter classes can override to build their own specific opener
1358
# TODO: Provides hooks for daughter classes.
1361
connection=ConnectionHandler,
1362
redirect=HTTPRedirectHandler,
1363
error=HTTPErrorProcessor,):
1364
self._opener = urllib2.build_opener( \
1365
connection, redirect, error,
1367
HTTPBasicAuthHandler(),
1368
HTTPDigestAuthHandler(),
1369
ProxyBasicAuthHandler(),
1370
ProxyDigestAuthHandler(),
1373
HTTPDefaultErrorHandler,
1376
self.open = self._opener.open
1378
# When dealing with handler order, it's easy to mess
1379
# things up, the following will help understand which
1380
# handler is used, when and for what.
1382
pprint.pprint(self._opener.__dict__)