1
# Copyright (C) 2005-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Base implementation of Transport over http using urllib.
19
There are separate implementation modules for each http client implementation.
22
from __future__ import absolute_import
39
import http.client as http_client
41
import httplib as http_client
43
import urllib.request as urllib_request
44
except ImportError: # python < 3
45
import urllib2 as urllib_request
47
from urllib.parse import urljoin, splitport, splittype, splithost, urlencode
49
from urlparse import urljoin
50
from urllib import splitport, splittype, splithost, urlencode
52
# TODO: handle_response should be integrated into the http/__init__.py
53
from .response import handle_response
55
# FIXME: Oversimplifying, two kind of exceptions should be
56
# raised, once a request is issued: URLError before we have been
57
# able to process the response, HTTPError after that. Process the
58
# response means we are able to leave the socket clean, so if we
59
# are not able to do that, we should close the connection. The
60
# actual code more or less do that, tests should be written to
63
from ... import __version__ as breezy_version
75
from ...bzr.smart import medium
76
from ...sixish import (
81
from ...trace import mutter
82
from ...transport import (
87
from . import default_user_agent, ssl
90
checked_kerberos = False
94
class addinfourl(urllib_request.addinfourl):
95
'''Replacement addinfourl class compatible with python-2.7's xmlrpclib
97
In python-2.7, xmlrpclib expects that the response object that it receives
98
has a getheader method. http_client.HTTPResponse provides this but
99
urllib_request.addinfourl does not. Add the necessary functions here, ported to
100
use the internal data structures of addinfourl.
103
def getheader(self, name, default=None):
104
if self.headers is None:
105
raise http_client.ResponseNotReady()
106
return self.headers.getheader(name, default)
108
def getheaders(self):
109
if self.headers is None:
110
raise http_client.ResponseNotReady()
111
return list(self.headers.items())
114
class _ReportingFileSocket(object):
116
def __init__(self, filesock, report_activity=None):
117
self.filesock = filesock
118
self._report_activity = report_activity
120
def report_activity(self, size, direction):
121
if self._report_activity:
122
self._report_activity(size, direction)
124
def read(self, size=1):
125
s = self.filesock.read(size)
126
self.report_activity(len(s), 'read')
129
def readline(self, size=-1):
130
s = self.filesock.readline(size)
131
self.report_activity(len(s), 'read')
134
def readinto(self, b):
135
s = self.filesock.readinto(b)
136
self.report_activity(s, 'read')
139
def __getattr__(self, name):
140
return getattr(self.filesock, name)
143
class _ReportingSocket(object):
145
def __init__(self, sock, report_activity=None):
147
self._report_activity = report_activity
149
def report_activity(self, size, direction):
150
if self._report_activity:
151
self._report_activity(size, direction)
153
def sendall(self, s, *args):
154
self.sock.sendall(s, *args)
155
self.report_activity(len(s), 'write')
157
def recv(self, *args):
158
s = self.sock.recv(*args)
159
self.report_activity(len(s), 'read')
162
def makefile(self, mode='r', bufsize=-1):
163
# http_client creates a fileobject that doesn't do buffering, which
164
# makes fp.readline() very expensive because it only reads one byte
165
# at a time. So we wrap the socket in an object that forces
166
# sock.makefile to make a buffered file.
167
fsock = self.sock.makefile(mode, 65536)
168
# And wrap that into a reporting kind of fileobject
169
return _ReportingFileSocket(fsock, self._report_activity)
171
def __getattr__(self, name):
172
return getattr(self.sock, name)
175
# We define our own Response class to keep our http_client pipe clean
176
class Response(http_client.HTTPResponse):
177
"""Custom HTTPResponse, to avoid the need to decorate.
179
http_client prefers to decorate the returned objects, rather
180
than using a custom object.
183
# Some responses have bodies in which we have no interest
184
_body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]
186
# in finish() below, we may have to discard several MB in the worst
187
# case. To avoid buffering that much, we read and discard by chunks
188
# instead. The underlying file is either a socket or a StringIO, so reading
189
# 8k chunks should be fine.
190
_discarded_buf_size = 8192
193
def __init__(self, sock, debuglevel=0, method=None, url=None):
195
super(Response, self).__init__(
196
sock, debuglevel=debuglevel, method=method, url=url)
199
"""Begin to read the response from the server.
201
http_client assumes that some responses get no content and do
202
not even attempt to read the body in that case, leaving
203
the body in the socket, blocking the next request. Let's
204
try to workaround that.
206
http_client.HTTPResponse.begin(self)
207
if self.status in self._body_ignored_responses:
208
if self.debuglevel >= 2:
209
print("For status: [%s], will ready body, length: %s" % (
210
self.status, self.length))
211
if not (self.length is None or self.will_close):
212
# In some cases, we just can't read the body not
213
# even try or we may encounter a 104, 'Connection
214
# reset by peer' error if there is indeed no body
215
# and the server closed the connection just after
216
# having issued the response headers (even if the
217
# headers indicate a Content-Type...)
218
body = self.read(self.length)
219
if self.debuglevel >= 9:
220
# This one can be huge and is generally not interesting
221
print("Consumed body: [%s]" % body)
223
elif self.status == 200:
224
# Whatever the request is, it went ok, so we surely don't want to
225
# close the connection. Some cases are not correctly detected by
226
# http_client.HTTPConnection.getresponse (called by
227
# http_client.HTTPResponse.begin). The CONNECT response for the https
228
# through proxy case is one. Note: the 'will_close' below refers
229
# to the "true" socket between us and the server, whereas the
230
# 'close()' above refers to the copy of that socket created by
231
# http_client for the response itself. So, in the if above we close the
232
# socket to indicate that we are done with the response whereas
233
# below we keep the socket with the server opened.
234
self.will_close = False
237
"""Finish reading the body.
239
In some cases, the client may have left some bytes to read in the
240
body. That will block the next request to succeed if we use a
241
persistent connection. If we don't use a persistent connection, well,
242
nothing will block the next request since a new connection will be
245
:return: the number of bytes left on the socket (may be None)
248
if not self.isclosed():
249
# Make sure nothing was left to be read on the socket
252
while data and self.length:
253
# read() will update self.length
254
data = self.read(min(self.length, self._discarded_buf_size))
257
trace.mutter("%s bytes left on the HTTP socket", pending)
262
# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
263
class AbstractHTTPConnection:
264
"""A custom HTTP(S) Connection, which can reset itself on a bad response"""
266
response_class = Response
268
# When we detect a server responding with the whole file to range requests,
269
# we want to warn. But not below a given thresold.
270
_range_warning_thresold = 1024 * 1024
272
def __init__(self, report_activity=None):
273
self._response = None
274
self._report_activity = report_activity
275
self._ranges_received_whole_file = None
277
def _mutter_connect(self):
278
netloc = '%s:%s' % (self.host, self.port)
279
if self.proxied_host is not None:
280
netloc += '(proxy for %s)' % self.proxied_host
281
trace.mutter('* About to connect() to %s' % netloc)
283
def getresponse(self):
284
"""Capture the response to be able to cleanup"""
285
self._response = http_client.HTTPConnection.getresponse(self)
286
return self._response
288
def cleanup_pipe(self):
289
"""Read the remaining bytes of the last response if any."""
290
if self._response is not None:
292
pending = self._response.finish()
293
# Warn the user (once)
294
if (self._ranges_received_whole_file is None
295
and self._response.status == 200
297
and pending > self._range_warning_thresold):
298
self._ranges_received_whole_file = True
300
'Got a 200 response when asking for multiple ranges,'
301
' does your server at %s:%s support range requests?',
302
self.host, self.port)
303
except socket.error as e:
304
# It's conceivable that the socket is in a bad state here
305
# (including some test cases) and in this case, it doesn't need
306
# cleaning anymore, so no need to fail, we just get rid of the
307
# socket and let callers reconnect
309
or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
312
self._response = None
313
# Preserve our preciousss
316
# Let http_client.HTTPConnection do its housekeeping
318
# Restore our preciousss
321
def _wrap_socket_for_reporting(self, sock):
322
"""Wrap the socket before anybody use it."""
323
self.sock = _ReportingSocket(sock, self._report_activity)
326
class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
328
# XXX: Needs refactoring at the caller level.
329
def __init__(self, host, port=None, proxied_host=None,
330
report_activity=None, ca_certs=None):
331
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
333
http_client.HTTPConnection.__init__(self, host, port)
335
# Use strict=True since we don't support HTTP/0.9
336
http_client.HTTPConnection.__init__(self, host, port, strict=True)
337
self.proxied_host = proxied_host
338
# ca_certs is ignored, it's only relevant for https
341
if 'http' in debug.debug_flags:
342
self._mutter_connect()
343
http_client.HTTPConnection.connect(self)
344
self._wrap_socket_for_reporting(self.sock)
347
class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
349
def __init__(self, host, port=None, key_file=None, cert_file=None,
351
report_activity=None, ca_certs=None):
352
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
354
http_client.HTTPSConnection.__init__(
355
self, host, port, key_file, cert_file)
357
# Use strict=True since we don't support HTTP/0.9
358
http_client.HTTPSConnection.__init__(self, host, port,
359
key_file, cert_file, strict=True)
360
self.proxied_host = proxied_host
361
self.ca_certs = ca_certs
364
if 'http' in debug.debug_flags:
365
self._mutter_connect()
366
http_client.HTTPConnection.connect(self)
367
self._wrap_socket_for_reporting(self.sock)
368
if self.proxied_host is None:
369
self.connect_to_origin()
371
def connect_to_origin(self):
372
# FIXME JRV 2011-12-18: Use location config here?
373
config_stack = config.GlobalStack()
374
cert_reqs = config_stack.get('ssl.cert_reqs')
375
if self.proxied_host is not None:
376
host = self.proxied_host.split(":", 1)[0]
379
if cert_reqs == ssl.CERT_NONE:
380
ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
381
ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
384
if self.ca_certs is None:
385
ca_certs = config_stack.get('ssl.ca_certs')
387
ca_certs = self.ca_certs
390
"No valid trusted SSL CA certificates file set. See "
391
"'brz help ssl.ca_certs' for more information on setting "
394
ssl_context = ssl.create_default_context(
395
purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
396
ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE
398
ssl_context.load_cert_chain(
399
keyfile=self.key_file, certfile=self.cert_file)
400
ssl_context.verify_mode = cert_reqs
401
ssl_sock = ssl_context.wrap_socket(
402
self.sock, server_hostname=self.host)
406
"See `brz help ssl.ca_certs` for how to specify trusted CA"
408
"Pass -Ossl.cert_reqs=none to disable certificate "
409
"verification entirely.\n")
411
# Wrap the ssl socket before anybody use it
412
self._wrap_socket_for_reporting(ssl_sock)
415
class Request(urllib_request.Request):
416
"""A custom Request object.
418
urllib_request determines the request method heuristically (based on
419
the presence or absence of data). We set the method
422
The Request object tracks:
423
- the connection the request will be made on.
424
- the authentication parameters needed to preventively set
425
the authentication header once a first authentication have
429
def __init__(self, method, url, data=None, headers={},
430
origin_req_host=None, unverifiable=False,
431
connection=None, parent=None):
432
urllib_request.Request.__init__(
433
self, url, data, headers,
434
origin_req_host, unverifiable)
436
self.connection = connection
437
# To handle redirections
439
self.redirected_to = None
440
# Unless told otherwise, redirections are not followed
441
self.follow_redirections = False
442
# auth and proxy_auth are dicts containing, at least
443
# (scheme, host, port, realm, user, password, protocol, path).
444
# The dict entries are mostly handled by the AuthHandler.
445
# Some authentication schemes may add more entries.
448
self.proxied_host = None
450
def get_method(self):
453
def set_proxy(self, proxy, type):
454
"""Set the proxy and remember the proxied host."""
456
host, port = splitport(self.host)
458
host, port = splitport(self.get_host())
460
# We need to set the default port ourselves way before it gets set
461
# in the HTTP[S]Connection object at build time.
462
if self.type == 'https':
463
conn_class = HTTPSConnection
465
conn_class = HTTPConnection
466
port = conn_class.default_port
467
self.proxied_host = '%s:%s' % (host, port)
468
urllib_request.Request.set_proxy(self, proxy, type)
469
# When urllib_request makes a https request with our wrapper code and a proxy,
470
# it sets Host to the https proxy, not the host we want to talk to.
471
# I'm fairly sure this is our fault, but what is the cause is an open
472
# question. -- Robert Collins May 8 2010.
473
self.add_unredirected_header('Host', self.proxied_host)
476
class _ConnectRequest(Request):
478
def __init__(self, request):
481
:param request: the first request sent to the proxied host, already
482
processed by the opener (i.e. proxied_host is already set).
484
# We give a fake url and redefine selector or urllib_request will be
486
Request.__init__(self, 'CONNECT', request.get_full_url(),
487
connection=request.connection)
488
if request.proxied_host is None:
489
raise AssertionError()
490
self.proxied_host = request.proxied_host
494
return self.proxied_host
496
def get_selector(self):
499
def set_proxy(self, proxy, type):
500
"""Set the proxy without remembering the proxied host.
502
We already know the proxied host by definition, the CONNECT request
503
occurs only when the connection goes through a proxy. The usual
504
processing (masquerade the request so that the connection is done to
505
the proxy while the request is targeted at another host) does not apply
506
here. In fact, the connection is already established with proxy and we
507
just want to enable the SSL tunneling.
509
urllib_request.Request.set_proxy(self, proxy, type)
512
class ConnectionHandler(urllib_request.BaseHandler):
513
"""Provides connection-sharing by pre-processing requests.
515
urllib_request provides no way to access the HTTPConnection object
516
internally used. But we need it in order to achieve
517
connection sharing. So, we add it to the request just before
518
it is processed, and then we override the do_open method for
519
http[s] requests in AbstractHTTPHandler.
522
handler_order = 1000 # after all pre-processings
524
def __init__(self, report_activity=None, ca_certs=None):
525
self._report_activity = report_activity
526
self.ca_certs = ca_certs
528
def create_connection(self, request, http_connection_class):
531
# Just a bit of paranoia here, this should have been
532
# handled in the higher levels
533
raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
535
# We create a connection (but it will not connect until the first
538
connection = http_connection_class(
539
host, proxied_host=request.proxied_host,
540
report_activity=self._report_activity,
541
ca_certs=self.ca_certs)
542
except http_client.InvalidURL as exception:
543
# There is only one occurrence of InvalidURL in http_client
544
raise urlutils.InvalidURL(request.get_full_url(),
545
extra='nonnumeric port')
549
def capture_connection(self, request, http_connection_class):
550
"""Capture or inject the request connection.
553
- the request have no connection: create a new one,
555
- the request have a connection: this one have been used
556
already, let's capture it, so that we can give it to
557
another transport to be reused. We don't do that
558
ourselves: the Transport object get the connection from
559
a first request and then propagate it, from request to
560
request or to cloned transports.
562
connection = request.connection
563
if connection is None:
565
connection = self.create_connection(request, http_connection_class)
566
request.connection = connection
568
# All connections will pass here, propagate debug level
569
connection.set_debuglevel(DEBUG)
572
def http_request(self, request):
573
return self.capture_connection(request, HTTPConnection)
575
def https_request(self, request):
576
return self.capture_connection(request, HTTPSConnection)
579
class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
580
"""A custom handler for HTTP(S) requests.
582
We overrive urllib_request.AbstractHTTPHandler to get a better
583
control of the connection, the ability to implement new
584
request types and return a response able to cope with
585
persistent connections.
588
# We change our order to be before urllib_request HTTP[S]Handlers
589
# and be chosen instead of them (the first http_open called
593
_default_headers = {'Pragma': 'no-cache',
594
'Cache-control': 'max-age=0',
595
'Connection': 'Keep-Alive',
596
'User-agent': default_user_agent(),
601
urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
603
def http_request(self, request):
604
"""Common headers setting"""
606
for name, value in self._default_headers.items():
607
if name not in request.headers:
608
request.headers[name] = value
609
# FIXME: We may have to add the Content-Length header if
610
# we have data to send.
613
def retry_or_raise(self, http_class, request, first_try):
614
"""Retry the request (once) or raise the exception.
616
urllib_request raises exception of application level kind, we
617
just have to translate them.
619
http_client can raise exceptions of transport level (badly
620
formatted dialog, loss of connexion or socket level
621
problems). In that case we should issue the request again
622
(http_client will close and reopen a new connection if
625
# When an exception occurs, we give back the original
626
# Traceback or the bugs are hard to diagnose.
627
exc_type, exc_val, exc_tb = sys.exc_info()
628
if exc_type == socket.gaierror:
629
# No need to retry, that will not help
631
origin_req_host = request.origin_req_host
633
origin_req_host = request.get_origin_req_host()
634
raise errors.ConnectionError("Couldn't resolve host '%s'"
637
elif isinstance(exc_val, http_client.ImproperConnectionState):
638
# The http_client pipeline is in incorrect state, it's a bug in our
640
reraise(exc_type, exc_val, exc_tb)
643
if self._debuglevel >= 2:
644
print('Received exception: [%r]' % exc_val)
645
print(' On connection: [%r]' % request.connection)
646
method = request.get_method()
647
url = request.get_full_url()
648
print(' Will retry, %s %r' % (method, url))
649
request.connection.close()
650
response = self.do_open(http_class, request, False)
652
if self._debuglevel >= 2:
653
print('Received second exception: [%r]' % exc_val)
654
print(' On connection: [%r]' % request.connection)
655
if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
656
# http_client.BadStatusLine and
657
# http_client.UnknownProtocol indicates that a
658
# bogus server was encountered or a bad
659
# connection (i.e. transient errors) is
660
# experimented, we have already retried once
661
# for that request so we raise the exception.
662
my_exception = errors.InvalidHttpResponse(
663
request.get_full_url(),
664
'Bad status line received',
666
elif (isinstance(exc_val, socket.error) and len(exc_val.args)
667
and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
668
# 10053 == WSAECONNABORTED
669
# 10054 == WSAECONNRESET
670
raise errors.ConnectionReset(
671
"Connection lost while sending request.")
673
# All other exception are considered connection related.
675
# socket errors generally occurs for reasons
676
# far outside our scope, so closing the
677
# connection and retrying is the best we can
680
selector = request.selector
682
selector = request.get_selector()
683
my_exception = errors.ConnectionError(
684
msg='while sending %s %s:' % (request.get_method(),
688
if self._debuglevel >= 2:
689
print('On connection: [%r]' % request.connection)
690
method = request.get_method()
691
url = request.get_full_url()
692
print(' Failed again, %s %r' % (method, url))
693
print(' Will raise: [%r]' % my_exception)
694
reraise(type(my_exception), my_exception, exc_tb)
697
def do_open(self, http_class, request, first_try=True):
698
"""See urllib_request.AbstractHTTPHandler.do_open for the general idea.
700
The request will be retried once if it fails.
702
connection = request.connection
703
if connection is None:
704
raise AssertionError(
705
'Cannot process a request without a connection')
707
# Get all the headers
709
headers.update(request.header_items())
710
headers.update(request.unredirected_hdrs)
711
# Some servers or proxies will choke on headers not properly
712
# cased. http_client/urllib/urllib_request all use capitalize to get canonical
713
# header names, but only python2.5 urllib_request use title() to fix them just
714
# before sending the request. And not all versions of python 2.5 do
715
# that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
717
headers = {name.title(): val for name, val in headers.items()}
720
method = request.get_method()
722
url = request.selector
724
url = request.get_selector()
725
if sys.version_info[:2] >= (3, 6):
726
connection._send_request(method, url,
727
# FIXME: implements 100-continue
728
# None, # We don't send the body yet
730
headers, encode_chunked=False)
732
connection._send_request(method, url,
733
# FIXME: implements 100-continue
734
# None, # We don't send the body yet
737
if 'http' in debug.debug_flags:
738
trace.mutter('> %s %s' % (method, url))
740
for k, v in headers.items():
741
# People are often told to paste -Dhttp output to help
742
# debug. Don't compromise credentials.
743
if k in ('Authorization', 'Proxy-Authorization'):
745
hdrs.append('%s: %s' % (k, v))
746
trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
747
if self._debuglevel >= 1:
748
print('Request sent: [%r] from (%s)'
749
% (request, request.connection.sock.getsockname()))
750
response = connection.getresponse()
751
convert_to_addinfourl = True
752
except (ssl.SSLError, ssl.CertificateError):
753
# Something is wrong with either the certificate or the hostname,
754
# re-trying won't help
756
except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
757
socket.error, http_client.HTTPException):
758
response = self.retry_or_raise(http_class, request, first_try)
759
convert_to_addinfourl = False
762
response.msg = response.reason
765
# FIXME: HTTPConnection does not fully support 100-continue (the
766
# server responses are just ignored)
769
# mutter('Will send the body')
770
# # We can send the body now
771
# body = request.data
773
# raise URLError("No data given")
774
# connection.send(body)
775
# response = connection.getresponse()
777
if self._debuglevel >= 2:
778
print('Receives response: %r' % response)
779
print(' For: %r(%r)' % (request.get_method(),
780
request.get_full_url()))
782
if convert_to_addinfourl:
783
# Shamelessly copied from urllib_request
787
fp = socket._fileobject(r, bufsize=65536)
788
resp = addinfourl(fp, r.msg, req.get_full_url())
791
resp.version = r.version
792
if self._debuglevel >= 2:
793
print('Create addinfourl: %r' % resp)
794
print(' For: %r(%r)' % (request.get_method(),
795
request.get_full_url()))
796
if 'http' in debug.debug_flags:
797
version = 'HTTP/%d.%d'
799
version = version % (resp.version / 10,
802
version = 'HTTP/%r' % resp.version
803
trace.mutter('< %s %s %s' % (version, resp.code,
805
# Use the raw header lines instead of treating resp.info() as a
806
# dict since we may miss duplicated headers otherwise.
807
hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
808
trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
814
class HTTPHandler(AbstractHTTPHandler):
815
"""A custom handler that just thunks into HTTPConnection"""
817
def http_open(self, request):
818
return self.do_open(HTTPConnection, request)
821
class HTTPSHandler(AbstractHTTPHandler):
822
"""A custom handler that just thunks into HTTPSConnection"""
824
https_request = AbstractHTTPHandler.http_request
826
def https_open(self, request):
827
connection = request.connection
828
if connection.sock is None and \
829
connection.proxied_host is not None and \
830
request.get_method() != 'CONNECT': # Don't loop
831
# FIXME: We need a gazillion connection tests here, but we still
832
# miss a https server :-( :
833
# - with and without proxy
834
# - with and without certificate
835
# - with self-signed certificate
836
# - with and without authentication
837
# - with good and bad credentials (especially the proxy auth around
839
# - with basic and digest schemes
840
# - reconnection on errors
841
# - connection persistence behaviour (including reconnection)
843
# We are about to connect for the first time via a proxy, we must
844
# issue a CONNECT request first to establish the encrypted link
845
connect = _ConnectRequest(request)
846
response = self.parent.open(connect)
847
if response.code != 200:
848
raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
849
connect.proxied_host, self.host))
851
connection.cleanup_pipe()
852
# Establish the connection encryption
853
connection.connect_to_origin()
854
# Propagate the connection to the original request
855
request.connection = connection
856
return self.do_open(HTTPSConnection, request)
859
class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
860
"""Handles redirect requests.
862
We have to implement our own scheme because we use a specific
863
Request object and because we want to implement a specific
867
# RFC2616 says that only read requests should be redirected
868
# without interacting with the user. But Breezy uses some
869
# shortcuts to optimize against roundtrips which can leads to
870
# write requests being issued before read requests of
871
# containing dirs can be redirected. So we redirect write
872
# requests in the same way which seems to respect the spirit
873
# of the RFC if not its letter.
875
def redirect_request(self, req, fp, code, msg, headers, newurl):
876
"""See urllib_request.HTTPRedirectHandler.redirect_request"""
877
# We would have preferred to update the request instead
878
# of creating a new one, but the urllib_request.Request object
879
# has a too complicated creation process to provide a
880
# simple enough equivalent update process. Instead, when
881
# redirecting, we only update the following request in
882
# the redirect chain with a reference to the parent
885
# Some codes make no sense in our context and are treated
888
# 300: Multiple choices for different representations of
889
# the URI. Using that mechanisn with Breezy will violate the
890
# protocol neutrality of Transport.
892
# 304: Not modified (SHOULD only occurs with conditional
893
# GETs which are not used by our implementation)
895
# 305: Use proxy. I can't imagine this one occurring in
896
# our context-- vila/20060909
898
# 306: Unused (if the RFC says so...)
900
# If the code is 302 and the request is HEAD, some may
901
# think that it is a sufficent hint that the file exists
902
# and that we MAY avoid following the redirections. But
903
# if we want to be sure, we MUST follow them.
906
origin_req_host = req.origin_req_host
908
origin_req_host = req.get_origin_req_host()
910
if code in (301, 302, 303, 307, 308):
911
return Request(req.get_method(), newurl,
913
origin_req_host=origin_req_host,
915
# TODO: It will be nice to be able to
916
# detect virtual hosts sharing the same
917
# IP address, that will allow us to
918
# share the same connection...
923
raise urllib_request.HTTPError(
924
req.get_full_url(), code, msg, headers, fp)
926
def http_error_302(self, req, fp, code, msg, headers):
927
"""Requests the redirected to URI.
929
Copied from urllib_request to be able to clean the pipe of the associated
930
connection, *before* issuing the redirected request but *after* having
931
eventually raised an error.
933
# Some servers (incorrectly) return multiple Location headers
934
# (so probably same goes for URI). Use first header.
936
# TODO: Once we get rid of addinfourl objects, the
937
# following will need to be updated to use correct case
939
if 'location' in headers:
940
newurl = headers.get('location')
941
elif 'uri' in headers:
942
newurl = headers.get('uri')
946
newurl = urljoin(req.get_full_url(), newurl)
948
if self._debuglevel >= 1:
949
print('Redirected to: %s (followed: %r)' % (newurl,
950
req.follow_redirections))
951
if req.follow_redirections is False:
952
req.redirected_to = newurl
955
# This call succeeds or raise an error. urllib_request returns
956
# if redirect_request returns None, but our
957
# redirect_request never returns None.
958
redirected_req = self.redirect_request(req, fp, code, msg, headers,
962
# .redirect_dict has a key url if url was previously visited.
963
if hasattr(req, 'redirect_dict'):
964
visited = redirected_req.redirect_dict = req.redirect_dict
965
if (visited.get(newurl, 0) >= self.max_repeats or
966
len(visited) >= self.max_redirections):
967
raise urllib_request.HTTPError(req.get_full_url(), code,
968
self.inf_msg + msg, headers, fp)
970
visited = redirected_req.redirect_dict = req.redirect_dict = {}
971
visited[newurl] = visited.get(newurl, 0) + 1
973
# We can close the fp now that we are sure that we won't
974
# use it with HTTPError.
976
# We have all we need already in the response
977
req.connection.cleanup_pipe()
979
return self.parent.open(redirected_req)
981
http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
984
class ProxyHandler(urllib_request.ProxyHandler):
985
"""Handles proxy setting.
987
Copied and modified from urllib_request to be able to modify the request during
988
the request pre-processing instead of modifying it at _open time. As we
989
capture (or create) the connection object during request processing, _open
992
The main task is to modify the request so that the connection is done to
993
the proxy while the request still refers to the destination host.
995
Note: the proxy handling *may* modify the protocol used; the request may be
996
against an https server proxied through an http proxy. So, https_request
997
will be called, but later it's really http_open that will be called. This
998
explains why we don't have to call self.parent.open as the urllib_request did.
1001
# Proxies must be in front
1005
def __init__(self, proxies=None):
1006
urllib_request.ProxyHandler.__init__(self, proxies)
1007
# First, let's get rid of urllib_request implementation
1008
for type, proxy in self.proxies.items():
1009
if self._debuglevel >= 3:
1010
print('Will unbind %s_open for %r' % (type, proxy))
1011
delattr(self, '%s_open' % type)
1013
def bind_scheme_request(proxy, scheme):
1016
scheme_request = scheme + '_request'
1017
if self._debuglevel >= 3:
1018
print('Will bind %s for %r' % (scheme_request, proxy))
1019
setattr(self, scheme_request,
1020
lambda request: self.set_proxy(request, scheme))
1021
# We are interested only by the http[s] proxies
1022
http_proxy = self.get_proxy_env_var('http')
1023
bind_scheme_request(http_proxy, 'http')
1024
https_proxy = self.get_proxy_env_var('https')
1025
bind_scheme_request(https_proxy, 'https')
1027
def get_proxy_env_var(self, name, default_to='all'):
1028
"""Get a proxy env var.
1030
Note that we indirectly rely on
1031
urllib.getproxies_environment taking into account the
1032
uppercased values for proxy variables.
1035
return self.proxies[name.lower()]
1037
if default_to is not None:
1038
# Try to get the alternate environment variable
1040
return self.proxies[default_to]
1045
def proxy_bypass(self, host):
1046
"""Check if host should be proxied or not.
1048
:returns: True to skip the proxy, False otherwise.
1050
no_proxy = self.get_proxy_env_var('no', default_to=None)
1051
bypass = self.evaluate_proxy_bypass(host, no_proxy)
1053
# Nevertheless, there are platform-specific ways to
1055
return urllib_request.proxy_bypass(host)
1059
def evaluate_proxy_bypass(self, host, no_proxy):
1060
"""Check the host against a comma-separated no_proxy list as a string.
1062
:param host: ``host:port`` being requested
1064
:param no_proxy: comma-separated list of hosts to access directly.
1066
:returns: True to skip the proxy, False not to, or None to
1069
if no_proxy is None:
1070
# All hosts are proxied
1072
hhost, hport = splitport(host)
1073
# Does host match any of the domains mentioned in
1074
# no_proxy ? The rules about what is authorized in no_proxy
1075
# are fuzzy (to say the least). We try to allow most
1076
# commonly seen values.
1077
for domain in no_proxy.split(','):
1078
domain = domain.strip()
1081
dhost, dport = splitport(domain)
1082
if hport == dport or dport is None:
1083
# Protect glob chars
1084
dhost = dhost.replace(".", r"\.")
1085
dhost = dhost.replace("*", r".*")
1086
dhost = dhost.replace("?", r".")
1087
if re.match(dhost, hhost, re.IGNORECASE):
1089
# Nothing explicitly avoid the host
1092
def set_proxy(self, request, type):
1096
host = request.get_host()
1097
if self.proxy_bypass(host):
1100
proxy = self.get_proxy_env_var(type)
1101
if self._debuglevel >= 3:
1102
print('set_proxy %s_request for %r' % (type, proxy))
1103
# FIXME: python 2.5 urlparse provides a better _parse_proxy which can
1104
# grok user:password@host:port as well as
1105
# http://user:password@host:port
1107
parsed_url = transport.ConnectedTransport._split_url(proxy)
1108
if not parsed_url.host:
1109
raise urlutils.InvalidURL(proxy, 'No host component')
1111
if request.proxy_auth == {}:
1112
# No proxy auth parameter are available, we are handling the first
1113
# proxied request, intialize. scheme (the authentication scheme)
1114
# and realm will be set by the AuthHandler
1115
request.proxy_auth = {
1116
'host': parsed_url.host,
1117
'port': parsed_url.port,
1118
'user': parsed_url.user,
1119
'password': parsed_url.password,
1120
'protocol': parsed_url.scheme,
1121
# We ignore path since we connect to a proxy
1123
if parsed_url.port is None:
1124
phost = parsed_url.host
1126
phost = parsed_url.host + ':%d' % parsed_url.port
1127
request.set_proxy(phost, type)
1128
if self._debuglevel >= 3:
1129
print('set_proxy: proxy set to %s://%s' % (type, phost))
1133
class AbstractAuthHandler(urllib_request.BaseHandler):
1134
"""A custom abstract authentication handler for all http authentications.
1136
Provides the meat to handle authentication errors and
1137
preventively set authentication headers after the first
1138
successful authentication.
1140
This can be used for http and proxy, as well as for basic, negotiate and
1141
digest authentications.
1143
This provides an unified interface for all authentication handlers
1144
(urllib_request provides far too many with different policies).
1146
The interaction between this handler and the urllib_request
1147
framework is not obvious, it works as follow:
1149
opener.open(request) is called:
1151
- that may trigger http_request which will add an authentication header
1152
(self.build_header) if enough info is available.
1154
- the request is sent to the server,
1156
- if an authentication error is received self.auth_required is called,
1157
we acquire the authentication info in the error headers and call
1158
self.auth_match to check that we are able to try the
1159
authentication and complete the authentication parameters,
1161
- we call parent.open(request), that may trigger http_request
1162
and will add a header (self.build_header), but here we have
1163
all the required info (keep in mind that the request and
1164
authentication used in the recursive calls are really (and must be)
1165
the *same* objects).
1167
- if the call returns a response, the authentication have been
1168
successful and the request authentication parameters have been updated.
1172
"""The scheme as it appears in the server header (lower cased)"""
1175
"""We don't want to retry authenticating endlessly"""
1177
requires_username = True
1178
"""Whether the auth mechanism requires a username."""
1180
# The following attributes should be defined by daughter
1182
# - auth_required_header: the header received from the server
1183
# - auth_header: the header sent in the request
1186
# We want to know when we enter into an try/fail cycle of
1187
# authentications so we initialize to None to indicate that we aren't
1188
# in such a cycle by default.
1189
self._retry_count = None
1191
def _parse_auth_header(self, server_header):
1192
"""Parse the authentication header.
1194
:param server_header: The value of the header sent by the server
1195
describing the authenticaion request.
1197
:return: A tuple (scheme, remainder) scheme being the first word in the
1198
given header (lower cased), remainder may be None.
1201
scheme, remainder = server_header.split(None, 1)
1203
scheme = server_header
1205
return (scheme.lower(), remainder)
1207
def update_auth(self, auth, key, value):
1208
"""Update a value in auth marking the auth as modified if needed"""
1209
old_value = auth.get(key, None)
1210
if old_value != value:
1212
auth['modified'] = True
1214
def auth_required(self, request, headers):
1215
"""Retry the request if the auth scheme is ours.
1217
:param request: The request needing authentication.
1218
:param headers: The headers for the authentication error response.
1219
:return: None or the response for the authenticated request.
1221
# Don't try to authenticate endlessly
1222
if self._retry_count is None:
1223
# The retry being recusrsive calls, None identify the first retry
1224
self._retry_count = 1
1226
self._retry_count += 1
1227
if self._retry_count > self._max_retry:
1228
# Let's be ready for next round
1229
self._retry_count = None
1232
server_headers = headers.get_all(self.auth_required_header)
1234
server_headers = headers.getheaders(self.auth_required_header)
1235
if not server_headers:
1236
# The http error MUST have the associated
1237
# header. This must never happen in production code.
1238
trace.mutter('%s not found', self.auth_required_header)
1241
auth = self.get_auth(request)
1242
auth['modified'] = False
1243
# Put some common info in auth if the caller didn't
1244
if auth.get('path', None) is None:
1245
parsed_url = urlutils.URL.from_string(request.get_full_url())
1246
self.update_auth(auth, 'protocol', parsed_url.scheme)
1247
self.update_auth(auth, 'host', parsed_url.host)
1248
self.update_auth(auth, 'port', parsed_url.port)
1249
self.update_auth(auth, 'path', parsed_url.path)
1250
# FIXME: the auth handler should be selected at a single place instead
1251
# of letting all handlers try to match all headers, but the current
1252
# design doesn't allow a simple implementation.
1253
for server_header in server_headers:
1254
# Several schemes can be proposed by the server, try to match each
1256
matching_handler = self.auth_match(server_header, auth)
1257
if matching_handler:
1258
# auth_match may have modified auth (by adding the
1259
# password or changing the realm, for example)
1260
if (request.get_header(self.auth_header, None) is not None
1261
and not auth['modified']):
1262
# We already tried that, give up
1265
# Only the most secure scheme proposed by the server should be
1266
# used, since the handlers use 'handler_order' to describe that
1267
# property, the first handler tried takes precedence, the
1268
# others should not attempt to authenticate if the best one
1270
best_scheme = auth.get('best_scheme', None)
1271
if best_scheme is None:
1272
# At that point, if current handler should doesn't succeed
1273
# the credentials are wrong (or incomplete), but we know
1274
# that the associated scheme should be used.
1275
best_scheme = auth['best_scheme'] = self.scheme
1276
if best_scheme != self.scheme:
1279
if self.requires_username and auth.get('user', None) is None:
1280
# Without a known user, we can't authenticate
1284
request.connection.cleanup_pipe()
1285
# Retry the request with an authentication header added
1286
response = self.parent.open(request)
1288
self.auth_successful(request, response)
1290
# We are not qualified to handle the authentication.
1291
# Note: the authentication error handling will try all
1292
# available handlers. If one of them authenticates
1293
# successfully, a response will be returned. If none of
1294
# them succeeds, None will be returned and the error
1295
# handler will raise the 401 'Unauthorized' or the 407
1296
# 'Proxy Authentication Required' error.
1299
def add_auth_header(self, request, header):
1300
"""Add the authentication header to the request"""
1301
request.add_unredirected_header(self.auth_header, header)
1303
def auth_match(self, header, auth):
1304
"""Check that we are able to handle that authentication scheme.
1306
The request authentication parameters may need to be
1307
updated with info from the server. Some of these
1308
parameters, when combined, are considered to be the
1309
authentication key, if one of them change the
1310
authentication result may change. 'user' and 'password'
1311
are exampls, but some auth schemes may have others
1312
(digest's nonce is an example, digest's nonce_count is a
1313
*counter-example*). Such parameters must be updated by
1314
using the update_auth() method.
1316
:param header: The authentication header sent by the server.
1317
:param auth: The auth parameters already known. They may be
1319
:returns: True if we can try to handle the authentication.
1321
raise NotImplementedError(self.auth_match)
1323
def build_auth_header(self, auth, request):
1324
"""Build the value of the header used to authenticate.
1326
:param auth: The auth parameters needed to build the header.
1327
:param request: The request needing authentication.
1329
:return: None or header.
1331
raise NotImplementedError(self.build_auth_header)
1333
def auth_successful(self, request, response):
1334
"""The authentification was successful for the request.
1336
Additional infos may be available in the response.
1338
:param request: The succesfully authenticated request.
1339
:param response: The server response (may contain auth info).
1341
# It may happen that we need to reconnect later, let's be ready
1342
self._retry_count = None
1344
def get_user_password(self, auth):
1345
"""Ask user for a password if none is already available.
1347
:param auth: authentication info gathered so far (from the initial url
1348
and then during dialog with the server).
1350
auth_conf = config.AuthenticationConfig()
1351
user = auth.get('user', None)
1352
password = auth.get('password', None)
1353
realm = auth['realm']
1354
port = auth.get('port', None)
1357
user = auth_conf.get_user(auth['protocol'], auth['host'],
1358
port=port, path=auth['path'],
1359
realm=realm, ask=True,
1360
prompt=self.build_username_prompt(auth))
1361
if user is not None and password is None:
1362
password = auth_conf.get_password(
1363
auth['protocol'], auth['host'], user,
1365
path=auth['path'], realm=realm,
1366
prompt=self.build_password_prompt(auth))
1368
return user, password
1370
def _build_password_prompt(self, auth):
1371
"""Build a prompt taking the protocol used into account.
1373
The AuthHandler is used by http and https, we want that information in
1374
the prompt, so we build the prompt from the authentication dict which
1375
contains all the needed parts.
1377
Also, http and proxy AuthHandlers present different prompts to the
1378
user. The daughter classes should implements a public
1379
build_password_prompt using this method.
1381
prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
1382
realm = auth['realm']
1383
if realm is not None:
1384
prompt += u", Realm: '%s'" % realm
1385
prompt += u' password'
1388
def _build_username_prompt(self, auth):
1389
"""Build a prompt taking the protocol used into account.
1391
The AuthHandler is used by http and https, we want that information in
1392
the prompt, so we build the prompt from the authentication dict which
1393
contains all the needed parts.
1395
Also, http and proxy AuthHandlers present different prompts to the
1396
user. The daughter classes should implements a public
1397
build_username_prompt using this method.
1399
prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
1400
realm = auth['realm']
1401
if realm is not None:
1402
prompt += u", Realm: '%s'" % realm
1403
prompt += u' username'
1406
def http_request(self, request):
1407
"""Insert an authentication header if information is available"""
1408
auth = self.get_auth(request)
1409
if self.auth_params_reusable(auth):
1410
self.add_auth_header(
1411
request, self.build_auth_header(auth, request))
1414
https_request = http_request # FIXME: Need test
1417
class NegotiateAuthHandler(AbstractAuthHandler):
1418
"""A authentication handler that handles WWW-Authenticate: Negotiate.
1420
At the moment this handler supports just Kerberos. In the future,
1421
NTLM support may also be added.
1424
scheme = 'negotiate'
1426
requires_username = False
1428
def auth_match(self, header, auth):
1429
scheme, raw_auth = self._parse_auth_header(header)
1430
if scheme != self.scheme:
1432
self.update_auth(auth, 'scheme', scheme)
1433
resp = self._auth_match_kerberos(auth)
1436
# Optionally should try to authenticate using NTLM here
1437
self.update_auth(auth, 'negotiate_response', resp)
1440
def _auth_match_kerberos(self, auth):
1441
"""Try to create a GSSAPI response for authenticating against a host."""
1442
global kerberos, checked_kerberos
1443
if kerberos is None and not checked_kerberos:
1448
checked_kerberos = True
1449
if kerberos is None:
1451
ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
1453
trace.warning('Unable to create GSSAPI context for %s: %d',
1456
ret = kerberos.authGSSClientStep(vc, "")
1458
trace.mutter('authGSSClientStep failed: %d', ret)
1460
return kerberos.authGSSClientResponse(vc)
1462
def build_auth_header(self, auth, request):
1463
return "Negotiate %s" % auth['negotiate_response']
1465
def auth_params_reusable(self, auth):
1466
# If the auth scheme is known, it means a previous
1467
# authentication was successful, all information is
1468
# available, no further checks are needed.
1469
return (auth.get('scheme', None) == 'negotiate' and
1470
auth.get('negotiate_response', None) is not None)
1473
class BasicAuthHandler(AbstractAuthHandler):
1474
"""A custom basic authentication handler."""
1478
auth_regexp = re.compile('realm="([^"]*)"', re.I)
1480
def build_auth_header(self, auth, request):
1481
raw = '%s:%s' % (auth['user'], auth['password'])
1482
auth_header = 'Basic ' + \
1483
base64.b64encode(raw.encode('utf-8')).decode('ascii')
1486
def extract_realm(self, header_value):
1487
match = self.auth_regexp.search(header_value)
1490
realm = match.group(1)
1493
def auth_match(self, header, auth):
1494
scheme, raw_auth = self._parse_auth_header(header)
1495
if scheme != self.scheme:
1498
match, realm = self.extract_realm(raw_auth)
1500
# Put useful info into auth
1501
self.update_auth(auth, 'scheme', scheme)
1502
self.update_auth(auth, 'realm', realm)
1503
if (auth.get('user', None) is None
1504
or auth.get('password', None) is None):
1505
user, password = self.get_user_password(auth)
1506
self.update_auth(auth, 'user', user)
1507
self.update_auth(auth, 'password', password)
1508
return match is not None
1510
def auth_params_reusable(self, auth):
1511
# If the auth scheme is known, it means a previous
1512
# authentication was successful, all information is
1513
# available, no further checks are needed.
1514
return auth.get('scheme', None) == 'basic'
1517
def get_digest_algorithm_impls(algorithm):
1520
if algorithm == 'MD5':
1521
def H(x): return osutils.md5(x).hexdigest()
1522
elif algorithm == 'SHA':
1523
H = osutils.sha_string
1525
def KD(secret, data): return H(
1526
("%s:%s" % (secret, data)).encode('utf-8'))
1530
def get_new_cnonce(nonce, nonce_count):
1531
raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
1532
osutils.rand_chars(8))
1533
return osutils.sha_string(raw.encode('utf-8'))[:16]
1536
class DigestAuthHandler(AbstractAuthHandler):
1537
"""A custom digest authentication handler."""
1540
# Before basic as digest is a bit more secure and should be preferred
1543
def auth_params_reusable(self, auth):
1544
# If the auth scheme is known, it means a previous
1545
# authentication was successful, all information is
1546
# available, no further checks are needed.
1547
return auth.get('scheme', None) == 'digest'
1549
def auth_match(self, header, auth):
1550
scheme, raw_auth = self._parse_auth_header(header)
1551
if scheme != self.scheme:
1554
# Put the requested authentication info into a dict
1555
req_auth = urllib_request.parse_keqv_list(
1556
urllib_request.parse_http_list(raw_auth))
1558
# Check that we can handle that authentication
1559
qop = req_auth.get('qop', None)
1560
if qop != 'auth': # No auth-int so far
1563
H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
1567
realm = req_auth.get('realm', None)
1568
# Put useful info into auth
1569
self.update_auth(auth, 'scheme', scheme)
1570
self.update_auth(auth, 'realm', realm)
1571
if auth.get('user', None) is None or auth.get('password', None) is None:
1572
user, password = self.get_user_password(auth)
1573
self.update_auth(auth, 'user', user)
1574
self.update_auth(auth, 'password', password)
1577
if req_auth.get('algorithm', None) is not None:
1578
self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1579
nonce = req_auth['nonce']
1580
if auth.get('nonce', None) != nonce:
1581
# A new nonce, never used
1582
self.update_auth(auth, 'nonce_count', 0)
1583
self.update_auth(auth, 'nonce', nonce)
1584
self.update_auth(auth, 'qop', qop)
1585
auth['opaque'] = req_auth.get('opaque', None)
1587
# Some required field is not there
1592
def build_auth_header(self, auth, request):
1594
selector = request.selector
1596
selector = request.get_selector()
1597
url_scheme, url_selector = splittype(selector)
1598
sel_host, uri = splithost(url_selector)
1601
(auth['user'], auth['realm'], auth['password'])).encode('utf-8')
1602
A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
1604
nonce = auth['nonce']
1607
nonce_count = auth['nonce_count'] + 1
1608
ncvalue = '%08x' % nonce_count
1609
cnonce = get_new_cnonce(nonce, nonce_count)
1611
H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1612
nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1613
request_digest = KD(H(A1), nonce_data)
1616
header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
1619
header += ', uri="%s"' % uri
1620
header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1621
header += ', qop="%s"' % qop
1622
header += ', response="%s"' % request_digest
1623
# Append the optional fields
1624
opaque = auth.get('opaque', None)
1626
header += ', opaque="%s"' % opaque
1627
if auth.get('algorithm', None):
1628
header += ', algorithm="%s"' % auth.get('algorithm')
1630
# We have used the nonce once more, update the count
1631
auth['nonce_count'] = nonce_count
1636
class HTTPAuthHandler(AbstractAuthHandler):
1637
"""Custom http authentication handler.
1639
Send the authentication preventively to avoid the roundtrip
1640
associated with the 401 error and keep the revelant info in
1641
the auth request attribute.
1644
auth_required_header = 'www-authenticate'
1645
auth_header = 'Authorization'
1647
def get_auth(self, request):
1648
"""Get the auth params from the request"""
1651
def set_auth(self, request, auth):
1652
"""Set the auth params for the request"""
1655
def build_password_prompt(self, auth):
1656
return self._build_password_prompt(auth)
1658
def build_username_prompt(self, auth):
1659
return self._build_username_prompt(auth)
1661
def http_error_401(self, req, fp, code, msg, headers):
1662
return self.auth_required(req, headers)
1665
class ProxyAuthHandler(AbstractAuthHandler):
1666
"""Custom proxy authentication handler.
1668
Send the authentication preventively to avoid the roundtrip
1669
associated with the 407 error and keep the revelant info in
1670
the proxy_auth request attribute..
1673
auth_required_header = 'proxy-authenticate'
1674
# FIXME: the correct capitalization is Proxy-Authorization,
1675
# but python-2.4 urllib_request.Request insist on using capitalize()
1676
# instead of title().
1677
auth_header = 'Proxy-authorization'
1679
def get_auth(self, request):
1680
"""Get the auth params from the request"""
1681
return request.proxy_auth
1683
def set_auth(self, request, auth):
1684
"""Set the auth params for the request"""
1685
request.proxy_auth = auth
1687
def build_password_prompt(self, auth):
1688
prompt = self._build_password_prompt(auth)
1689
prompt = u'Proxy ' + prompt
1692
def build_username_prompt(self, auth):
1693
prompt = self._build_username_prompt(auth)
1694
prompt = u'Proxy ' + prompt
1697
def http_error_407(self, req, fp, code, msg, headers):
1698
return self.auth_required(req, headers)
1701
class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
1702
"""Custom http basic authentication handler"""
1705
class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
1706
"""Custom proxy basic authentication handler"""
1709
class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
1710
"""Custom http basic authentication handler"""
1713
class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
1714
"""Custom proxy basic authentication handler"""
1717
class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
1718
"""Custom http negotiate authentication handler"""
1721
class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
1722
"""Custom proxy negotiate authentication handler"""
1725
class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
1726
"""Process HTTP error responses.
1728
We don't really process the errors, quite the contrary
1729
instead, we leave our Transport handle them.
1732
accepted_errors = [200, # Ok
1736
206, # Partial content
1740
405, # Method not allowed
1741
406, # Not Acceptable
1743
416, # Range not satisfiable
1744
422, # Unprocessible entity
1745
501, # Not implemented
1747
"""The error codes the caller will handle.
1749
This can be specialized in the request on a case-by case basis, but the
1750
common cases are covered here.
1753
def http_response(self, request, response):
1754
code, msg, hdrs = response.code, response.msg, response.info()
1756
if code not in self.accepted_errors:
1757
response = self.parent.error('http', request, response,
1761
https_response = http_response
1764
class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
1765
"""Translate common errors into Breezy Exceptions"""
1767
def http_error_default(self, req, fp, code, msg, hdrs):
1769
raise errors.TransportError(
1770
'Server refuses to fulfill the request (403 Forbidden)'
1771
' for %s' % req.get_full_url())
1773
raise errors.UnexpectedHttpStatus(
1774
req.get_full_url(), code,
1775
'Unable to handle http code: %s' % msg)
1778
class Opener(object):
1779
"""A wrapper around urllib_request.build_opener
1781
Daughter classes can override to build their own specific opener
1783
# TODO: Provides hooks for daughter classes.
1786
connection=ConnectionHandler,
1787
redirect=HTTPRedirectHandler,
1788
error=HTTPErrorProcessor,
1789
report_activity=None,
1791
self._opener = urllib_request.build_opener(
1792
connection(report_activity=report_activity, ca_certs=ca_certs),
1795
HTTPBasicAuthHandler(),
1796
HTTPDigestAuthHandler(),
1797
HTTPNegotiateAuthHandler(),
1798
ProxyBasicAuthHandler(),
1799
ProxyDigestAuthHandler(),
1800
ProxyNegotiateAuthHandler(),
1803
HTTPDefaultErrorHandler,
1806
self.open = self._opener.open
1808
# When dealing with handler order, it's easy to mess
1809
# things up, the following will help understand which
1810
# handler is used, when and for what.
1812
pprint.pprint(self._opener.__dict__)
1815
class HttpTransport(ConnectedTransport):
1816
"""HTTP Client implementations.
1818
The protocol can be given as e.g. http+urllib://host/ to use a particular
1822
# _unqualified_scheme: "http" or "https"
1823
# _scheme: may have "+pycurl", etc
1825
# In order to debug we have to issue our traces in sync with
1826
# httplib, which use print :(
1829
def __init__(self, base, _from_transport=None, ca_certs=None):
1830
"""Set the base path where files will be stored."""
1831
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
1833
raise AssertionError("not a http url: %r" % base)
1834
self._unqualified_scheme = proto_match.group(1)
1835
super(HttpTransport, self).__init__(
1836
base, _from_transport=_from_transport)
1838
# range hint is handled dynamically throughout the life
1839
# of the transport object. We start by trying multi-range
1840
# requests and if the server returns bogus results, we
1841
# retry with single range requests and, finally, we
1842
# forget about range if the server really can't
1843
# understand. Once acquired, this piece of info is
1844
# propagated to clones.
1845
if _from_transport is not None:
1846
self._range_hint = _from_transport._range_hint
1847
self._opener = _from_transport._opener
1849
self._range_hint = 'multi'
1850
self._opener = Opener(
1851
report_activity=self._report_activity, ca_certs=ca_certs)
1853
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
1854
body = urlopen_kw.pop('body', None)
1855
if fields is not None:
1856
data = urlencode(fields).encode()
1857
if body is not None:
1859
'body and fields are mutually exclusive')
1864
request = Request(method, url, data, headers)
1865
request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)
1867
raise NotImplementedError(
1868
'unknown arguments: %r' % urlopen_kw.keys())
1869
connection = self._get_connection()
1870
if connection is not None:
1871
# Give back shared info
1872
request.connection = connection
1873
(auth, proxy_auth) = self._get_credentials()
1874
# Clean the httplib.HTTPConnection pipeline in case the previous
1875
# request couldn't do it
1876
connection.cleanup_pipe()
1878
# First request, initialize credentials.
1879
# scheme and realm will be set by the _urllib2_wrappers.AuthHandler
1880
auth = self._create_auth()
1881
# Proxy initialization will be done by the first proxied request
1883
# Ensure authentication info is provided
1885
request.proxy_auth = proxy_auth
1887
if self._debuglevel > 0:
1888
print('perform: %s base: %s, url: %s' % (request.method, self.base,
1889
request.get_full_url()))
1890
response = self._opener.open(request)
1891
if self._get_connection() is not request.connection:
1892
# First connection or reconnection
1893
self._set_connection(request.connection,
1894
(request.auth, request.proxy_auth))
1896
# http may change the credentials while keeping the
1898
self._update_credentials((request.auth, request.proxy_auth))
1900
code = response.code
1901
if (request.follow_redirections is False
1902
and code in (301, 302, 303, 307, 308)):
1903
raise errors.RedirectRequested(request.get_full_url(),
1904
request.redirected_to,
1905
is_permanent=(code in (301, 308)))
1907
if request.redirected_to is not None:
1908
trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),
1909
request.redirected_to))
1911
class Urllib3LikeResponse(object):
1913
def __init__(self, actual):
1914
self._actual = actual
1917
def getheader(self, name, default=None):
1918
if self._actual.headers is None:
1919
raise http_client.ResponseNotReady()
1921
return self._actual.headers.get(name, default)
1923
return self._actual.headers.getheader(name, default)
1925
def getheaders(self):
1926
if self._actual.headers is None:
1927
raise http_client.ResponseNotReady()
1928
return list(self._actual.headers.items())
1932
return self._actual.code
1936
return self._actual.reason
1940
if self._data is None:
1941
self._data = self._actual.read()
1946
if self.status == 204:
1948
charset = cgi.parse_header(
1949
self._actual.headers['Content-Type'])[1].get('charset')
1951
return self.data.decode(charset)
1953
return self.data.decode()
1955
def read(self, amt=None):
1956
return self._actual.read(amt)
1958
def readlines(self):
1959
return self._actual.readlines()
1961
def readline(self, size=-1):
1962
return self._actual.readline(size)
1964
return Urllib3LikeResponse(response)
1966
def disconnect(self):
1967
connection = self._get_connection()
1968
if connection is not None:
1971
def has(self, relpath):
1972
"""Does the target location exist?
1974
response = self._head(relpath)
1976
code = response.status
1977
if code == 200: # "ok",
1982
def get(self, relpath):
1983
"""Get the file at the given relative path.
1985
:param relpath: The relative path to the file
1987
code, response_file = self._get(relpath, None)
1988
return response_file
1990
def _get(self, relpath, offsets, tail_amount=0):
1991
"""Get a file, or part of a file.
1993
:param relpath: Path relative to transport base URL
1994
:param offsets: None to get the whole file;
1995
or a list of _CoalescedOffset to fetch parts of a file.
1996
:param tail_amount: The amount to get from the end of the file.
1998
:returns: (http_code, result_file)
2000
abspath = self._remote_path(relpath)
2002
if offsets or tail_amount:
2003
range_header = self._attempted_range_header(offsets, tail_amount)
2004
if range_header is not None:
2005
bytes = 'bytes=' + range_header
2006
headers = {'Range': bytes}
2010
response = self.request('GET', abspath, headers=headers)
2012
if response.status == 404: # not found
2013
raise errors.NoSuchFile(abspath)
2014
elif response.status == 416:
2015
# We don't know which, but one of the ranges we specified was
2017
raise errors.InvalidHttpRange(abspath, range_header,
2018
'Server return code %d' % response.status)
2019
elif response.status == 400:
2021
# We don't know which, but one of the ranges we specified was
2023
raise errors.InvalidHttpRange(
2024
abspath, range_header,
2025
'Server return code %d' % response.status)
2027
raise errors.BadHttpRequest(abspath, response.reason)
2028
elif response.status not in (200, 206):
2029
raise errors.UnexpectedHttpStatus(abspath, response.status)
2031
data = handle_response(
2032
abspath, response.status, response.getheader, response)
2033
return response.status, data
2035
def _remote_path(self, relpath):
2036
"""See ConnectedTransport._remote_path.
2038
user and passwords are not embedded in the path provided to the server.
2040
url = self._parsed_url.clone(relpath)
2041
url.user = url.quoted_user = None
2042
url.password = url.quoted_password = None
2043
url.scheme = self._unqualified_scheme
2046
def _create_auth(self):
2047
"""Returns a dict containing the credentials provided at build time."""
2048
auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
2049
user=self._parsed_url.user, password=self._parsed_url.password,
2050
protocol=self._unqualified_scheme,
2051
path=self._parsed_url.path)
2054
def get_smart_medium(self):
2055
"""See Transport.get_smart_medium."""
2056
if self._medium is None:
2057
# Since medium holds some state (smart server probing at least), we
2058
# need to keep it around. Note that this is needed because medium
2059
# has the same 'base' attribute as the transport so it can't be
2060
# shared between transports having different bases.
2061
self._medium = SmartClientHTTPMedium(self)
2064
def _degrade_range_hint(self, relpath, ranges):
2065
if self._range_hint == 'multi':
2066
self._range_hint = 'single'
2067
mutter('Retry "%s" with single range request' % relpath)
2068
elif self._range_hint == 'single':
2069
self._range_hint = None
2070
mutter('Retry "%s" without ranges' % relpath)
2072
# We tried all the tricks, but nothing worked, caller must reraise.
2076
# _coalesce_offsets is a helper for readv, it try to combine ranges without
2077
# degrading readv performances. _bytes_to_read_before_seek is the value
2078
# used for the limit parameter and has been tuned for other transports. For
2079
# HTTP, the name is inappropriate but the parameter is still useful and
2080
# helps reduce the number of chunks in the response. The overhead for a
2081
# chunk (headers, length, footer around the data itself is variable but
2082
# around 50 bytes. We use 128 to reduce the range specifiers that appear in
2083
# the header, some servers (notably Apache) enforce a maximum length for a
2084
# header and issue a '400: Bad request' error when too much ranges are
2086
_bytes_to_read_before_seek = 128
2087
# No limit on the offset number that get combined into one, we are trying
2088
# to avoid downloading the whole file.
2089
_max_readv_combine = 0
2090
# By default Apache has a limit of ~400 ranges before replying with a 400
2091
# Bad Request. So we go underneath that amount to be safe.
2092
_max_get_ranges = 200
2093
# We impose no limit on the range size. But see _pycurl.py for a different
2097
def _readv(self, relpath, offsets):
2098
"""Get parts of the file at the given relative path.
2100
:param offsets: A list of (offset, size) tuples.
2101
:param return: A list or generator of (offset, data) tuples
2103
# offsets may be a generator, we will iterate it several times, so
2105
offsets = list(offsets)
2108
retried_offset = None
2112
# Coalesce the offsets to minimize the GET requests issued
2113
sorted_offsets = sorted(offsets)
2114
coalesced = self._coalesce_offsets(
2115
sorted_offsets, limit=self._max_readv_combine,
2116
fudge_factor=self._bytes_to_read_before_seek,
2117
max_size=self._get_max_size)
2119
# Turn it into a list, we will iterate it several times
2120
coalesced = list(coalesced)
2121
if 'http' in debug.debug_flags:
2122
mutter('http readv of %s offsets => %s collapsed %s',
2123
relpath, len(offsets), len(coalesced))
2125
# Cache the data read, but only until it's been used
2127
# We will iterate on the data received from the GET requests and
2128
# serve the corresponding offsets respecting the initial order. We
2129
# need an offset iterator for that.
2130
iter_offsets = iter(offsets)
2132
cur_offset_and_size = next(iter_offsets)
2133
except StopIteration:
2137
for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
2138
# Split the received chunk
2139
for offset, size in cur_coal.ranges:
2140
start = cur_coal.start + offset
2141
rfile.seek(start, os.SEEK_SET)
2142
data = rfile.read(size)
2143
data_len = len(data)
2144
if data_len != size:
2145
raise errors.ShortReadvError(relpath, start, size,
2147
if (start, size) == cur_offset_and_size:
2148
# The offset requested are sorted as the coalesced
2149
# ones, no need to cache. Win !
2150
yield cur_offset_and_size[0], data
2152
cur_offset_and_size = next(iter_offsets)
2153
except StopIteration:
2156
# Different sorting. We need to cache.
2157
data_map[(start, size)] = data
2159
# Yield everything we can
2160
while cur_offset_and_size in data_map:
2161
# Clean the cached data since we use it
2162
# XXX: will break if offsets contains duplicates --
2164
this_data = data_map.pop(cur_offset_and_size)
2165
yield cur_offset_and_size[0], this_data
2167
cur_offset_and_size = next(iter_offsets)
2168
except StopIteration:
2171
except (errors.ShortReadvError, errors.InvalidRange,
2172
errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:
2173
mutter('Exception %r: %s during http._readv', e, e)
2174
if (not isinstance(e, errors.ShortReadvError)
2175
or retried_offset == cur_offset_and_size):
2176
# We don't degrade the range hint for ShortReadvError since
2177
# they do not indicate a problem with the server ability to
2178
# handle ranges. Except when we fail to get back a required
2179
# offset twice in a row. In that case, falling back to
2180
# single range or whole file should help.
2181
if not self._degrade_range_hint(relpath, coalesced):
2183
# Some offsets may have been already processed, so we retry
2184
# only the unsuccessful ones.
2185
offsets = [cur_offset_and_size] + [o for o in iter_offsets]
2186
retried_offset = cur_offset_and_size
2189
def _coalesce_readv(self, relpath, coalesced):
2190
"""Issue several GET requests to satisfy the coalesced offsets"""
2192
def get_and_yield(relpath, coalesced):
2194
# Note that the _get below may raise
2195
# errors.InvalidHttpRange. It's the caller's responsibility to
2196
# decide how to retry since it may provide different coalesced
2198
code, rfile = self._get(relpath, coalesced)
2199
for coal in coalesced:
2202
if self._range_hint is None:
2203
# Download whole file
2204
for c, rfile in get_and_yield(relpath, coalesced):
2207
total = len(coalesced)
2208
if self._range_hint == 'multi':
2209
max_ranges = self._max_get_ranges
2210
elif self._range_hint == 'single':
2213
raise AssertionError("Unknown _range_hint %r"
2214
% (self._range_hint,))
2215
# TODO: Some web servers may ignore the range requests and return
2216
# the whole file, we may want to detect that and avoid further
2218
# Hint: test_readv_multiple_get_requests will fail once we do that
2221
for coal in coalesced:
2222
if ((self._get_max_size > 0
2223
and cumul + coal.length > self._get_max_size) or
2224
len(ranges) >= max_ranges):
2225
# Get that much and yield
2226
for c, rfile in get_and_yield(relpath, ranges):
2228
# Restart with the current offset
2233
cumul += coal.length
2234
# Get the rest and yield
2235
for c, rfile in get_and_yield(relpath, ranges):
2238
def recommended_page_size(self):
2239
"""See Transport.recommended_page_size().
2241
For HTTP we suggest a large page size to reduce the overhead
2242
introduced by latency.
2246
def _post(self, body_bytes):
2247
"""POST body_bytes to .bzr/smart on this transport.
2249
:returns: (response code, response body file-like object).
2251
# TODO: Requiring all the body_bytes to be available at the beginning of
2252
# the POST may require large client buffers. It would be nice to have
2253
# an interface that allows streaming via POST when possible (and
2254
# degrades to a local buffer when not).
2255
abspath = self._remote_path('.bzr/smart')
2256
response = self.request(
2257
'POST', abspath, body=body_bytes,
2258
headers={'Content-Type': 'application/octet-stream'})
2259
if response.status not in (200, 403):
2260
raise errors.UnexpectedHttpStatus(abspath, response.status)
2261
code = response.status
2262
data = handle_response(
2263
abspath, code, response.getheader, response)
2266
def _head(self, relpath):
2267
"""Request the HEAD of a file.
2269
Performs the request and leaves callers handle the results.
2271
abspath = self._remote_path(relpath)
2272
response = self.request('HEAD', abspath)
2273
if response.status not in (200, 404):
2274
raise errors.UnexpectedHttpStatus(abspath, response.status)
2278
raise NotImplementedError(self._post)
2280
def put_file(self, relpath, f, mode=None):
2281
"""Copy the file-like object into the location.
2283
:param relpath: Location to put the contents, relative to base.
2284
:param f: File-like object.
2286
raise errors.TransportNotPossible('http PUT not supported')
2288
def mkdir(self, relpath, mode=None):
2289
"""Create a directory at the given path."""
2290
raise errors.TransportNotPossible('http does not support mkdir()')
2292
def rmdir(self, relpath):
2293
"""See Transport.rmdir."""
2294
raise errors.TransportNotPossible('http does not support rmdir()')
2296
def append_file(self, relpath, f, mode=None):
2297
"""Append the text in the file-like object into the final
2300
raise errors.TransportNotPossible('http does not support append()')
2302
def copy(self, rel_from, rel_to):
2303
"""Copy the item at rel_from to the location at rel_to"""
2304
raise errors.TransportNotPossible('http does not support copy()')
2306
def copy_to(self, relpaths, other, mode=None, pb=None):
2307
"""Copy a set of entries from self into another Transport.
2309
:param relpaths: A list/generator of entries to be copied.
2311
TODO: if other is LocalTransport, is it possible to
2312
do better than put(get())?
2314
# At this point HttpTransport might be able to check and see if
2315
# the remote location is the same, and rather than download, and
2316
# then upload, it could just issue a remote copy_this command.
2317
if isinstance(other, HttpTransport):
2318
raise errors.TransportNotPossible(
2319
'http cannot be the target of copy_to()')
2321
return super(HttpTransport, self).\
2322
copy_to(relpaths, other, mode=mode, pb=pb)
2324
def move(self, rel_from, rel_to):
2325
"""Move the item at rel_from to the location at rel_to"""
2326
raise errors.TransportNotPossible('http does not support move()')
2328
def delete(self, relpath):
2329
"""Delete the item at relpath"""
2330
raise errors.TransportNotPossible('http does not support delete()')
2332
def external_url(self):
2333
"""See breezy.transport.Transport.external_url."""
2334
# HTTP URL's are externally usable as long as they don't mention their
2335
# implementation qualifier
2336
url = self._parsed_url.clone()
2337
url.scheme = self._unqualified_scheme
2340
def is_readonly(self):
2341
"""See Transport.is_readonly."""
2345
"""See Transport.listable."""
2348
def stat(self, relpath):
2349
"""Return the stat information for a file.
2351
raise errors.TransportNotPossible('http does not support stat()')
2353
def lock_read(self, relpath):
2354
"""Lock the given file for shared (read) access.
2355
:return: A lock object, which should be passed to Transport.unlock()
2357
# The old RemoteBranch ignore lock for reading, so we will
2358
# continue that tradition and return a bogus lock object.
2359
class BogusLock(object):
2360
def __init__(self, path):
2365
return BogusLock(relpath)
2367
def lock_write(self, relpath):
2368
"""Lock the given file for exclusive (write) access.
2369
WARNING: many transports do not support this, so trying avoid using it
2371
:return: A lock object, which should be passed to Transport.unlock()
2373
raise errors.TransportNotPossible('http does not support lock_write()')
2375
def _attempted_range_header(self, offsets, tail_amount):
2376
"""Prepare a HTTP Range header at a level the server should accept.
2378
:return: the range header representing offsets/tail_amount or None if
2379
no header can be built.
2382
if self._range_hint == 'multi':
2383
# Generate the header describing all offsets
2384
return self._range_header(offsets, tail_amount)
2385
elif self._range_hint == 'single':
2386
# Combine all the requested ranges into a single
2388
if len(offsets) > 0:
2389
if tail_amount not in (0, None):
2390
# Nothing we can do here to combine ranges with tail_amount
2391
# in a single range, just returns None. The whole file
2392
# should be downloaded.
2395
start = offsets[0].start
2397
end = last.start + last.length - 1
2398
whole = self._coalesce_offsets([(start, end - start + 1)],
2399
limit=0, fudge_factor=0)
2400
return self._range_header(list(whole), 0)
2402
# Only tail_amount, requested, leave range_header
2404
return self._range_header(offsets, tail_amount)
2409
def _range_header(ranges, tail_amount):
2410
"""Turn a list of bytes ranges into a HTTP Range header value.
2412
:param ranges: A list of _CoalescedOffset
2413
:param tail_amount: The amount to get from the end of the file.
2415
:return: HTTP range header string.
2417
At least a non-empty ranges *or* a tail_amount must be
2421
for offset in ranges:
2422
strings.append('%d-%d' % (offset.start,
2423
offset.start + offset.length - 1))
2426
strings.append('-%d' % tail_amount)
2428
return ','.join(strings)
2430
def _redirected_to(self, source, target):
2431
"""Returns a transport suitable to re-issue a redirected request.
2433
:param source: The source url as returned by the server.
2434
:param target: The target url as returned by the server.
2436
The redirection can be handled only if the relpath involved is not
2437
renamed by the redirection.
2439
:returns: A transport
2440
:raise UnusableRedirect: when the URL can not be reinterpreted
2442
parsed_source = self._split_url(source)
2443
parsed_target = self._split_url(target)
2444
pl = len(self._parsed_url.path)
2445
# determine the excess tail - the relative path that was in
2446
# the original request but not part of this transports' URL.
2447
excess_tail = parsed_source.path[pl:].strip("/")
2448
if not parsed_target.path.endswith(excess_tail):
2449
# The final part of the url has been renamed, we can't handle the
2451
raise UnusableRedirect(
2452
source, target, "final part of the url was renamed")
2454
target_path = parsed_target.path
2456
# Drop the tail that was in the redirect but not part of
2457
# the path of this transport.
2458
target_path = target_path[:-len(excess_tail)]
2460
if parsed_target.scheme in ('http', 'https'):
2461
# Same protocol family (i.e. http[s]), we will preserve the same
2462
# http client implementation when a redirection occurs from one to
2463
# the other (otherwise users may be surprised that bzr switches
2464
# from one implementation to the other, and devs may suffer
2466
if (parsed_target.scheme == self._unqualified_scheme
2467
and parsed_target.host == self._parsed_url.host
2468
and parsed_target.port == self._parsed_url.port
2469
and (parsed_target.user is None or
2470
parsed_target.user == self._parsed_url.user)):
2471
# If a user is specified, it should match, we don't care about
2472
# passwords, wrong passwords will be rejected anyway.
2473
return self.clone(target_path)
2475
# Rebuild the url preserving the scheme qualification and the
2476
# credentials (if they don't apply, the redirected to server
2477
# will tell us, but if they do apply, we avoid prompting the
2479
redir_scheme = parsed_target.scheme
2480
new_url = self._unsplit_url(redir_scheme,
2481
self._parsed_url.user,
2482
self._parsed_url.password,
2483
parsed_target.host, parsed_target.port,
2485
return transport.get_transport_from_url(new_url)
2487
# Redirected to a different protocol
2488
new_url = self._unsplit_url(parsed_target.scheme,
2490
parsed_target.password,
2491
parsed_target.host, parsed_target.port,
2493
return transport.get_transport_from_url(new_url)
2495
def _options(self, relpath):
2496
abspath = self._remote_path(relpath)
2497
resp = self.request('OPTIONS', abspath)
2498
if resp.status == 404:
2499
raise errors.NoSuchFile(abspath)
2500
if resp.status in (403, 405):
2501
raise errors.InvalidHttpResponse(
2503
"OPTIONS not supported or forbidden for remote URL")
2504
return resp.getheaders()
2507
# TODO: May be better located in smart/medium.py with the other
2508
# SmartMedium classes
2509
class SmartClientHTTPMedium(medium.SmartClientMedium):
2511
def __init__(self, http_transport):
2512
super(SmartClientHTTPMedium, self).__init__(http_transport.base)
2513
# We don't want to create a circular reference between the http
2514
# transport and its associated medium. Since the transport will live
2515
# longer than the medium, the medium keep only a weak reference to its
2517
self._http_transport_ref = weakref.ref(http_transport)
2519
def get_request(self):
2520
return SmartClientHTTPMediumRequest(self)
2522
def should_probe(self):
2525
def remote_path_from_transport(self, transport):
2526
# Strip the optional 'bzr+' prefix from transport so it will have the
2527
# same scheme as self.
2528
transport_base = transport.base
2529
if transport_base.startswith('bzr+'):
2530
transport_base = transport_base[4:]
2531
rel_url = urlutils.relative_url(self.base, transport_base)
2532
return urlutils.unquote(rel_url)
2534
def send_http_smart_request(self, bytes):
2536
# Get back the http_transport hold by the weak reference
2537
t = self._http_transport_ref()
2538
code, body_filelike = t._post(bytes)
2540
raise errors.UnexpectedHttpStatus(
2541
t._remote_path('.bzr/smart'), code)
2542
except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:
2543
raise errors.SmartProtocolError(str(e))
2544
return body_filelike
2546
def _report_activity(self, bytes, direction):
2547
"""See SmartMedium._report_activity.
2549
Does nothing; the underlying plain HTTP transport will report the
2550
activity that this medium would report.
2554
def disconnect(self):
2555
"""See SmartClientMedium.disconnect()."""
2556
t = self._http_transport_ref()
2560
# TODO: May be better located in smart/medium.py with the other
2561
# SmartMediumRequest classes
2562
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
2563
"""A SmartClientMediumRequest that works with an HTTP medium."""
2565
def __init__(self, client_medium):
2566
medium.SmartClientMediumRequest.__init__(self, client_medium)
2569
def _accept_bytes(self, bytes):
2570
self._buffer += bytes
2572
def _finished_writing(self):
2573
data = self._medium.send_http_smart_request(self._buffer)
2574
self._response_body = data
2576
def _read_bytes(self, count):
2577
"""See SmartClientMediumRequest._read_bytes."""
2578
return self._response_body.read(count)
2580
def _read_line(self):
2581
line, excess = medium._get_line(self._response_body.read)
2583
raise AssertionError(
2584
'_get_line returned excess bytes, but this mediumrequest '
2585
'cannot handle excess. (%r)' % (excess,))
2588
def _finished_reading(self):
2589
"""See SmartClientMediumRequest._finished_reading."""
2593
def unhtml_roughly(maybe_html, length_limit=1000):
2594
"""Very approximate html->text translation, for presenting error bodies.
2596
:param length_limit: Truncate the result to this many characters.
2598
>>> unhtml_roughly("<b>bad</b> things happened\\n")
2599
' bad things happened '
2601
return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit]
2604
def get_test_permutations():
2605
"""Return the permutations to be used in testing."""
2606
from breezy.tests import (
2610
permutations = [(HttpTransport, http_server.HttpServer), ]
2611
if features.HTTPSServerFeature.available():
2612
from breezy.tests import (
2617
class HTTPS_transport(HttpTransport):
2619
def __init__(self, base, _from_transport=None):
2620
super(HTTPS_transport, self).__init__(
2621
base, _from_transport=_from_transport,
2622
ca_certs=ssl_certs.build_path('ca.crt'))
2624
permutations.append((HTTPS_transport,
2625
https_server.HTTPSServer))