21
21
For instance, we create a new HTTPConnection and HTTPSConnection that inherit
22
22
from the original urllib2.HTTP(s)Connection objects, but also have a new base
23
which implements a custom getresponse and fake_close handlers.
23
which implements a custom getresponse and cleanup_pipe handlers.
25
25
And then we implement custom HTTPHandler and HTTPSHandler classes, that use
26
26
the custom HTTPConnection classes.
73
75
# Some responses have bodies in which we have no interest
74
76
_body_ignored_responses = [301,302, 303, 307, 401, 403, 404]
76
def __init__(self, *args, **kwargs):
77
httplib.HTTPResponse.__init__(self, *args, **kwargs)
78
# in finish() below, we may have to discard several MB in the worst
79
# case. To avoid buffering that much, we read and discard by chunks
80
# instead. The underlying file is either a socket or a StringIO, so reading
81
# 8k chunks should be fine.
82
_discarded_buf_size = 8192
80
85
"""Begin to read the response from the server.
87
92
httplib.HTTPResponse.begin(self)
88
93
if self.status in self._body_ignored_responses:
89
if self.debuglevel > 0:
94
if self.debuglevel >= 2:
90
95
print "For status: [%s]," % self.status,
91
print "will ready body, length: ",
92
if self.length is not None:
93
print "[%d]" % self.length
96
print "will ready body, length: %s" % self.length
96
97
if not (self.length is None or self.will_close):
97
98
# In some cases, we just can't read the body not
98
99
# even try or we may encounter a 104, 'Connection
100
101
# and the server closed the connection just after
101
102
# having issued the response headers (even if the
102
103
# headers indicate a Content-Type...)
103
body = self.fp.read(self.length)
104
if self.debuglevel > 0:
104
body = self.read(self.length)
105
if self.debuglevel >= 9:
106
# This one can be huge and is generally not interesting
105
107
print "Consumed body: [%s]" % body
107
109
elif self.status == 200:
117
119
# below we keep the socket with the server opened.
118
120
self.will_close = False
123
"""Finish reading the body.
125
In some cases, the client may have left some bytes to read in the
126
body. That will block the next request to succeed if we use a
127
persistent connection. If we don't use a persistent connection, well,
128
nothing will block the next request since a new connection will be
131
:return: the number of bytes left on the socket (may be None)
134
if not self.isclosed():
135
# Make sure nothing was left to be read on the socket
138
while data and self.length:
139
# read() will update self.length
140
data = self.read(min(self.length, self._discarded_buf_size))
143
trace.mutter("%s bytes left on the HTTP socket", pending)
121
148
# Not inheriting from 'object' because httplib.HTTPConnection doesn't.
122
149
class AbstractHTTPConnection:
123
150
"""A custom HTTP(S) Connection, which can reset itself on a bad response"""
125
152
response_class = Response
126
strict = 1 # We don't support HTTP/0.9
128
def fake_close(self):
129
"""Make the connection believes the response have been fully handled.
131
That makes the httplib.HTTPConnection happy
154
# When we detect a server responding with the whole file to range requests,
155
# we want to warn. But not below a given thresold.
156
_range_warning_thresold = 1024 * 1024
159
self._response = None
160
self._ranges_received_whole_file = None
162
def _mutter_connect(self):
163
netloc = '%s:%s' % (self.host, self.port)
164
if self.proxied_host is not None:
165
netloc += '(proxy for %s)' % self.proxied_host
166
trace.mutter('* About to connect() to %s' % netloc)
168
def getresponse(self):
169
"""Capture the response to be able to cleanup"""
170
self._response = httplib.HTTPConnection.getresponse(self)
171
return self._response
173
def cleanup_pipe(self):
174
"""Read the remaining bytes of the last response if any."""
175
if self._response is not None:
176
pending = self._response.finish()
177
# Warn the user (once)
178
if (self._ranges_received_whole_file is None
179
and self._response.status == 200
180
and pending and pending > self._range_warning_thresold
182
self._ranges_received_whole_file = True
184
'Got a 200 response when asking for multiple ranges,'
185
' does your server at %s:%s support range requests?',
186
self.host, self.port)
187
self._response = None
133
188
# Preserve our preciousss
142
197
class HTTPConnection(AbstractHTTPConnection, httplib.HTTPConnection):
144
199
# XXX: Needs refactoring at the caller level.
145
def __init__(self, host, port=None, strict=None, proxied_host=None):
146
httplib.HTTPConnection.__init__(self, host, port, strict)
200
def __init__(self, host, port=None, proxied_host=None):
201
AbstractHTTPConnection.__init__(self)
202
# Use strict=True since we don't support HTTP/0.9
203
httplib.HTTPConnection.__init__(self, host, port, strict=True)
147
204
self.proxied_host = proxied_host
207
if 'http' in debug.debug_flags:
208
self._mutter_connect()
209
httplib.HTTPConnection.connect(self)
212
# FIXME: Should test for ssl availability
150
213
class HTTPSConnection(AbstractHTTPConnection, httplib.HTTPSConnection):
152
215
def __init__(self, host, port=None, key_file=None, cert_file=None,
153
strict=None, proxied_host=None):
217
AbstractHTTPConnection.__init__(self)
218
# Use strict=True since we don't support HTTP/0.9
154
219
httplib.HTTPSConnection.__init__(self, host, port,
155
key_file, cert_file, strict)
220
key_file, cert_file, strict=True)
156
221
self.proxied_host = proxied_host
158
223
def connect(self):
224
if 'http' in debug.debug_flags:
225
self._mutter_connect()
159
226
httplib.HTTPConnection.connect(self)
160
227
if self.proxied_host is None:
161
228
self.connect_to_origin()
372
439
print ' Will retry, %s %r' % (method, url)
373
440
request.connection.close()
374
441
response = self.do_open(http_class, request, False)
375
convert_to_addinfourl = False
377
if self._debuglevel > 0:
443
if self._debuglevel >= 2:
378
444
print 'Received second exception: [%r]' % exc_val
379
445
print ' On connection: [%r]' % request.connection
380
446
if exc_type in (httplib.BadStatusLine, httplib.UnknownProtocol):
401
467
request.get_selector()),
402
468
orig_error=exc_val)
404
if self._debuglevel > 0:
470
if self._debuglevel >= 2:
405
471
print 'On connection: [%r]' % request.connection
406
472
method = request.get_method()
407
473
url = request.get_full_url()
408
474
print ' Failed again, %s %r' % (method, url)
409
475
print ' Will raise: [%r]' % my_exception
410
476
raise my_exception, None, exc_tb
411
return response, convert_to_addinfourl
413
479
def do_open(self, http_class, request, first_try=True):
414
480
"""See urllib2.AbstractHTTPHandler.do_open for the general idea.
425
491
headers.update(request.unredirected_hdrs)
428
connection._send_request(request.get_method(),
429
request.get_selector(),
494
method = request.get_method()
495
url = request.get_selector()
496
connection._send_request(method, url,
430
497
# FIXME: implements 100-continue
431
498
#None, # We don't send the body yet
432
499
request.get_data(),
434
if self._debuglevel > 0:
435
print 'Request sent: [%r]' % request
501
if 'http' in debug.debug_flags:
502
trace.mutter('> %s %s' % (method, url))
503
hdrs = ['%s: %s' % (k, v) for k,v in headers.items()]
504
trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
505
if self._debuglevel >= 1:
506
print 'Request sent: [%r] from (%s)' \
507
% (request, request.connection.sock.getsockname())
436
508
response = connection.getresponse()
437
509
convert_to_addinfourl = True
438
510
except (socket.gaierror, httplib.BadStatusLine, httplib.UnknownProtocol,
439
511
socket.error, httplib.HTTPException):
440
response, convert_to_addinfourl = self.retry_or_raise(http_class,
512
response = self.retry_or_raise(http_class, request, first_try)
513
convert_to_addinfourl = False
444
515
# FIXME: HTTPConnection does not fully support 100-continue (the
445
516
# server responses are just ignored)
467
538
resp = urllib2.addinfourl(fp, r.msg, req.get_full_url())
468
539
resp.code = r.status
469
540
resp.msg = r.reason
470
if self._debuglevel > 0:
541
resp.version = r.version
542
if self._debuglevel >= 2:
471
543
print 'Create addinfourl: %r' % resp
472
544
print ' For: %r(%r)' % (request.get_method(),
473
545
request.get_full_url())
546
if 'http' in debug.debug_flags:
547
version = 'HTTP/%d.%d'
549
version = version % (resp.version / 10,
552
version = 'HTTP/%r' % resp.version
553
trace.mutter('< %s %s %s' % (version, resp.code,
555
# Use the raw header lines instead of treating resp.info() as a
556
# dict since we may miss duplicated headers otherwise.
557
hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
558
trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
478
# # we need titled headers in a dict but
479
# # response.getheaders returns a list of (lower(header).
480
# # Let's title that because most of bzr handle titled
481
# # headers, but maybe we should switch to lowercased
483
# # jam 20060908: I think we actually expect the headers to
484
# # be similar to mimetools.Message object, which uses
485
# # case insensitive keys. It lowers() all requests.
486
# # My concern is that the code may not do perfect title case.
487
# # For example, it may use Content-type rather than Content-Type
489
# # When we get rid of addinfourl, we must ensure that bzr
490
# # always use titled headers and that any header received
491
# # from server is also titled.
494
# for header, value in (response.getheaders()):
495
# headers[header.title()] = value
496
# # FIXME: Implements a secured .read method
497
# response.code = response.status
498
# response.headers = headers
502
564
class HTTPHandler(AbstractHTTPHandler):
503
565
"""A custom handler that just thunks into HTTPConnection"""
523
585
# - with and without certificate
524
586
# - with self-signed certificate
525
587
# - with and without authentication
526
# - with good and bad credentials (especially the proxy auth aound
588
# - with good and bad credentials (especially the proxy auth around
528
590
# - with basic and digest schemes
529
591
# - reconnection on errors
537
599
raise ConnectionError("Can't connect to %s via proxy %s" % (
538
600
connect.proxied_host, self.host))
540
connection.fake_close()
602
connection.cleanup_pipe()
541
603
# Establish the connection encryption
542
604
connection.connect_to_origin()
543
605
# Propagate the connection to the original request
608
670
def http_error_302(self, req, fp, code, msg, headers):
609
671
"""Requests the redirected to URI.
611
Copied from urllib2 to be able to fake_close the
612
associated connection, *before* issuing the redirected
613
request but *after* having eventually raised an error.
673
Copied from urllib2 to be able to clean the pipe of the associated
674
connection, *before* issuing the redirected request but *after* having
675
eventually raised an error.
615
677
# Some servers (incorrectly) return multiple Location headers
616
678
# (so probably same goes for URI). Use first header.
687
749
urllib2.ProxyHandler.__init__(self, proxies)
688
750
# First, let's get rid of urllib2 implementation
689
751
for type, proxy in self.proxies.items():
690
if self._debuglevel > 0:
752
if self._debuglevel >= 3:
691
753
print 'Will unbind %s_open for %r' % (type, proxy)
692
754
delattr(self, '%s_open' % type)
696
758
https_proxy = self.get_proxy_env_var('https')
698
760
if http_proxy is not None:
699
if self._debuglevel > 0:
761
if self._debuglevel >= 3:
700
762
print 'Will bind http_request for %r' % http_proxy
701
763
setattr(self, 'http_request',
702
764
lambda request: self.set_proxy(request, 'http'))
704
766
if https_proxy is not None:
705
if self._debuglevel > 0:
767
if self._debuglevel >= 3:
706
768
print 'Will bind http_request for %r' % https_proxy
707
769
setattr(self, 'https_request',
708
770
lambda request: self.set_proxy(request, 'https'))
966
1030
the prompt, so we build the prompt from the authentication dict which
967
1031
contains all the needed parts.
969
Also, hhtp and proxy AuthHandlers present different prompts to the
970
user. The daughter classes hosuld implements a public
1033
Also, http and proxy AuthHandlers present different prompts to the
1034
user. The daughter classes should implements a public
971
1035
build_password_prompt using this method.
973
1037
prompt = '%s' % auth['protocol'].upper() + ' %(user)s@%(host)s'
1250
1314
def http_error_default(self, req, fp, code, msg, hdrs):
1251
1315
if code == 403:
1252
1316
raise errors.TransportError('Server refuses to fullfil the request')
1254
# We don't know which, but one of the ranges we
1255
# specified was wrong. So we raise with 0 for a lack
1256
# of a better magic value.
1257
raise errors.InvalidRange(req.get_full_url(),0)
1259
1318
raise errors.InvalidHttpResponse(req.get_full_url(),
1260
1319
'Unable to handle http code %d: %s'