1
# Copyright (C) 2005-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Base implementation of Transport over http using urllib.
19
There are separate implementation modules for each http client implementation.
22
from __future__ import absolute_import
39
import http.client as http_client
41
import httplib as http_client
43
import urllib.request as urllib_request
44
except ImportError: # python < 3
45
import urllib2 as urllib_request
47
from urllib.parse import urljoin, splitport, splittype, splithost, urlencode
49
from urlparse import urljoin
50
from urllib import splitport, splittype, splithost, urlencode
52
# TODO: handle_response should be integrated into the http/__init__.py
53
from .response import handle_response
55
# FIXME: Oversimplifying, two kind of exceptions should be
56
# raised, once a request is issued: URLError before we have been
57
# able to process the response, HTTPError after that. Process the
58
# response means we are able to leave the socket clean, so if we
59
# are not able to do that, we should close the connection. The
60
# actual code more or less do that, tests should be written to
63
from ... import __version__ as breezy_version
75
from ...bzr.smart import medium
76
from ...trace import mutter
77
from ...transport import (
82
from . import default_user_agent, ssl
85
checked_kerberos = False
89
class addinfourl(urllib_request.addinfourl):
90
'''Replacement addinfourl class compatible with python-2.7's xmlrpclib
92
In python-2.7, xmlrpclib expects that the response object that it receives
93
has a getheader method. http_client.HTTPResponse provides this but
94
urllib_request.addinfourl does not. Add the necessary functions here, ported to
95
use the internal data structures of addinfourl.
98
def getheader(self, name, default=None):
99
if self.headers is None:
100
raise http_client.ResponseNotReady()
101
return self.headers.getheader(name, default)
103
def getheaders(self):
104
if self.headers is None:
105
raise http_client.ResponseNotReady()
106
return list(self.headers.items())
109
class _ReportingFileSocket(object):
111
def __init__(self, filesock, report_activity=None):
112
self.filesock = filesock
113
self._report_activity = report_activity
115
def report_activity(self, size, direction):
116
if self._report_activity:
117
self._report_activity(size, direction)
119
def read(self, size=1):
120
s = self.filesock.read(size)
121
self.report_activity(len(s), 'read')
124
def readline(self, size=-1):
125
s = self.filesock.readline(size)
126
self.report_activity(len(s), 'read')
129
def readinto(self, b):
130
s = self.filesock.readinto(b)
131
self.report_activity(s, 'read')
134
def __getattr__(self, name):
135
return getattr(self.filesock, name)
138
class _ReportingSocket(object):
140
def __init__(self, sock, report_activity=None):
142
self._report_activity = report_activity
144
def report_activity(self, size, direction):
145
if self._report_activity:
146
self._report_activity(size, direction)
148
def sendall(self, s, *args):
149
self.sock.sendall(s, *args)
150
self.report_activity(len(s), 'write')
152
def recv(self, *args):
153
s = self.sock.recv(*args)
154
self.report_activity(len(s), 'read')
157
def makefile(self, mode='r', bufsize=-1):
158
# http_client creates a fileobject that doesn't do buffering, which
159
# makes fp.readline() very expensive because it only reads one byte
160
# at a time. So we wrap the socket in an object that forces
161
# sock.makefile to make a buffered file.
162
fsock = self.sock.makefile(mode, 65536)
163
# And wrap that into a reporting kind of fileobject
164
return _ReportingFileSocket(fsock, self._report_activity)
166
def __getattr__(self, name):
167
return getattr(self.sock, name)
170
# We define our own Response class to keep our http_client pipe clean
171
class Response(http_client.HTTPResponse):
172
"""Custom HTTPResponse, to avoid the need to decorate.
174
http_client prefers to decorate the returned objects, rather
175
than using a custom object.
178
# Some responses have bodies in which we have no interest
179
_body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]
181
# in finish() below, we may have to discard several MB in the worst
182
# case. To avoid buffering that much, we read and discard by chunks
183
# instead. The underlying file is either a socket or a StringIO, so reading
184
# 8k chunks should be fine.
185
_discarded_buf_size = 8192
187
def __init__(self, sock, debuglevel=0, method=None, url=None):
189
super(Response, self).__init__(
190
sock, debuglevel=debuglevel, method=method, url=url)
193
"""Begin to read the response from the server.
195
http_client assumes that some responses get no content and do
196
not even attempt to read the body in that case, leaving
197
the body in the socket, blocking the next request. Let's
198
try to workaround that.
200
http_client.HTTPResponse.begin(self)
201
if self.status in self._body_ignored_responses:
202
if self.debuglevel >= 2:
203
print("For status: [%s], will ready body, length: %s" % (
204
self.status, self.length))
205
if not (self.length is None or self.will_close):
206
# In some cases, we just can't read the body not
207
# even try or we may encounter a 104, 'Connection
208
# reset by peer' error if there is indeed no body
209
# and the server closed the connection just after
210
# having issued the response headers (even if the
211
# headers indicate a Content-Type...)
212
body = self.read(self.length)
213
if self.debuglevel >= 9:
214
# This one can be huge and is generally not interesting
215
print("Consumed body: [%s]" % body)
217
elif self.status == 200:
218
# Whatever the request is, it went ok, so we surely don't want to
219
# close the connection. Some cases are not correctly detected by
220
# http_client.HTTPConnection.getresponse (called by
221
# http_client.HTTPResponse.begin). The CONNECT response for the https
222
# through proxy case is one. Note: the 'will_close' below refers
223
# to the "true" socket between us and the server, whereas the
224
# 'close()' above refers to the copy of that socket created by
225
# http_client for the response itself. So, in the if above we close the
226
# socket to indicate that we are done with the response whereas
227
# below we keep the socket with the server opened.
228
self.will_close = False
231
"""Finish reading the body.
233
In some cases, the client may have left some bytes to read in the
234
body. That will block the next request to succeed if we use a
235
persistent connection. If we don't use a persistent connection, well,
236
nothing will block the next request since a new connection will be
239
:return: the number of bytes left on the socket (may be None)
242
if not self.isclosed():
243
# Make sure nothing was left to be read on the socket
246
while data and self.length:
247
# read() will update self.length
248
data = self.read(min(self.length, self._discarded_buf_size))
251
trace.mutter("%s bytes left on the HTTP socket", pending)
256
# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
257
class AbstractHTTPConnection:
258
"""A custom HTTP(S) Connection, which can reset itself on a bad response"""
260
response_class = Response
262
# When we detect a server responding with the whole file to range requests,
263
# we want to warn. But not below a given thresold.
264
_range_warning_thresold = 1024 * 1024
266
def __init__(self, report_activity=None):
267
self._response = None
268
self._report_activity = report_activity
269
self._ranges_received_whole_file = None
271
def _mutter_connect(self):
272
netloc = '%s:%s' % (self.host, self.port)
273
if self.proxied_host is not None:
274
netloc += '(proxy for %s)' % self.proxied_host
275
trace.mutter('* About to connect() to %s' % netloc)
277
def getresponse(self):
278
"""Capture the response to be able to cleanup"""
279
self._response = http_client.HTTPConnection.getresponse(self)
280
return self._response
282
def cleanup_pipe(self):
283
"""Read the remaining bytes of the last response if any."""
284
if self._response is not None:
286
pending = self._response.finish()
287
# Warn the user (once)
288
if (self._ranges_received_whole_file is None
289
and self._response.status == 200
291
and pending > self._range_warning_thresold):
292
self._ranges_received_whole_file = True
294
'Got a 200 response when asking for multiple ranges,'
295
' does your server at %s:%s support range requests?',
296
self.host, self.port)
297
except socket.error as e:
298
# It's conceivable that the socket is in a bad state here
299
# (including some test cases) and in this case, it doesn't need
300
# cleaning anymore, so no need to fail, we just get rid of the
301
# socket and let callers reconnect
303
or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
306
self._response = None
307
# Preserve our preciousss
310
# Let http_client.HTTPConnection do its housekeeping
312
# Restore our preciousss
315
def _wrap_socket_for_reporting(self, sock):
316
"""Wrap the socket before anybody use it."""
317
self.sock = _ReportingSocket(sock, self._report_activity)
320
class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
322
# XXX: Needs refactoring at the caller level.
323
def __init__(self, host, port=None, proxied_host=None,
324
report_activity=None, ca_certs=None):
325
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
326
http_client.HTTPConnection.__init__(self, host, port)
327
self.proxied_host = proxied_host
328
# ca_certs is ignored, it's only relevant for https
331
if 'http' in debug.debug_flags:
332
self._mutter_connect()
333
http_client.HTTPConnection.connect(self)
334
self._wrap_socket_for_reporting(self.sock)
337
class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
339
def __init__(self, host, port=None, key_file=None, cert_file=None,
341
report_activity=None, ca_certs=None):
342
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
343
http_client.HTTPSConnection.__init__(
344
self, host, port, key_file, cert_file)
345
self.proxied_host = proxied_host
346
self.ca_certs = ca_certs
349
if 'http' in debug.debug_flags:
350
self._mutter_connect()
351
http_client.HTTPConnection.connect(self)
352
self._wrap_socket_for_reporting(self.sock)
353
if self.proxied_host is None:
354
self.connect_to_origin()
356
def connect_to_origin(self):
357
# FIXME JRV 2011-12-18: Use location config here?
358
config_stack = config.GlobalStack()
359
cert_reqs = config_stack.get('ssl.cert_reqs')
360
if self.proxied_host is not None:
361
host = self.proxied_host.split(":", 1)[0]
364
if cert_reqs == ssl.CERT_NONE:
365
ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
366
ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
369
if self.ca_certs is None:
370
ca_certs = config_stack.get('ssl.ca_certs')
372
ca_certs = self.ca_certs
375
"No valid trusted SSL CA certificates file set. See "
376
"'brz help ssl.ca_certs' for more information on setting "
379
ssl_context = ssl.create_default_context(
380
purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
381
ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE
383
ssl_context.load_cert_chain(
384
keyfile=self.key_file, certfile=self.cert_file)
385
ssl_context.verify_mode = cert_reqs
386
ssl_sock = ssl_context.wrap_socket(
387
self.sock, server_hostname=self.host)
391
"See `brz help ssl.ca_certs` for how to specify trusted CA"
393
"Pass -Ossl.cert_reqs=none to disable certificate "
394
"verification entirely.\n")
396
# Wrap the ssl socket before anybody use it
397
self._wrap_socket_for_reporting(ssl_sock)
400
class Request(urllib_request.Request):
401
"""A custom Request object.
403
urllib_request determines the request method heuristically (based on
404
the presence or absence of data). We set the method
407
The Request object tracks:
408
- the connection the request will be made on.
409
- the authentication parameters needed to preventively set
410
the authentication header once a first authentication have
414
def __init__(self, method, url, data=None, headers={},
415
origin_req_host=None, unverifiable=False,
416
connection=None, parent=None):
417
urllib_request.Request.__init__(
418
self, url, data, headers,
419
origin_req_host, unverifiable)
421
self.connection = connection
422
# To handle redirections
424
self.redirected_to = None
425
# Unless told otherwise, redirections are not followed
426
self.follow_redirections = False
427
# auth and proxy_auth are dicts containing, at least
428
# (scheme, host, port, realm, user, password, protocol, path).
429
# The dict entries are mostly handled by the AuthHandler.
430
# Some authentication schemes may add more entries.
433
self.proxied_host = None
435
def get_method(self):
438
def set_proxy(self, proxy, type):
439
"""Set the proxy and remember the proxied host."""
440
host, port = splitport(self.host)
442
# We need to set the default port ourselves way before it gets set
443
# in the HTTP[S]Connection object at build time.
444
if self.type == 'https':
445
conn_class = HTTPSConnection
447
conn_class = HTTPConnection
448
port = conn_class.default_port
449
self.proxied_host = '%s:%s' % (host, port)
450
urllib_request.Request.set_proxy(self, proxy, type)
451
# When urllib_request makes a https request with our wrapper code and a proxy,
452
# it sets Host to the https proxy, not the host we want to talk to.
453
# I'm fairly sure this is our fault, but what is the cause is an open
454
# question. -- Robert Collins May 8 2010.
455
self.add_unredirected_header('Host', self.proxied_host)
458
class _ConnectRequest(Request):
460
def __init__(self, request):
463
:param request: the first request sent to the proxied host, already
464
processed by the opener (i.e. proxied_host is already set).
466
# We give a fake url and redefine selector or urllib_request will be
468
Request.__init__(self, 'CONNECT', request.get_full_url(),
469
connection=request.connection)
470
if request.proxied_host is None:
471
raise AssertionError()
472
self.proxied_host = request.proxied_host
476
return self.proxied_host
478
def get_selector(self):
481
def set_proxy(self, proxy, type):
482
"""Set the proxy without remembering the proxied host.
484
We already know the proxied host by definition, the CONNECT request
485
occurs only when the connection goes through a proxy. The usual
486
processing (masquerade the request so that the connection is done to
487
the proxy while the request is targeted at another host) does not apply
488
here. In fact, the connection is already established with proxy and we
489
just want to enable the SSL tunneling.
491
urllib_request.Request.set_proxy(self, proxy, type)
494
class ConnectionHandler(urllib_request.BaseHandler):
495
"""Provides connection-sharing by pre-processing requests.
497
urllib_request provides no way to access the HTTPConnection object
498
internally used. But we need it in order to achieve
499
connection sharing. So, we add it to the request just before
500
it is processed, and then we override the do_open method for
501
http[s] requests in AbstractHTTPHandler.
504
handler_order = 1000 # after all pre-processings
506
def __init__(self, report_activity=None, ca_certs=None):
507
self._report_activity = report_activity
508
self.ca_certs = ca_certs
510
def create_connection(self, request, http_connection_class):
513
# Just a bit of paranoia here, this should have been
514
# handled in the higher levels
515
raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
517
# We create a connection (but it will not connect until the first
520
connection = http_connection_class(
521
host, proxied_host=request.proxied_host,
522
report_activity=self._report_activity,
523
ca_certs=self.ca_certs)
524
except http_client.InvalidURL as exception:
525
# There is only one occurrence of InvalidURL in http_client
526
raise urlutils.InvalidURL(request.get_full_url(),
527
extra='nonnumeric port')
531
def capture_connection(self, request, http_connection_class):
532
"""Capture or inject the request connection.
535
- the request have no connection: create a new one,
537
- the request have a connection: this one have been used
538
already, let's capture it, so that we can give it to
539
another transport to be reused. We don't do that
540
ourselves: the Transport object get the connection from
541
a first request and then propagate it, from request to
542
request or to cloned transports.
544
connection = request.connection
545
if connection is None:
547
connection = self.create_connection(request, http_connection_class)
548
request.connection = connection
550
# All connections will pass here, propagate debug level
551
connection.set_debuglevel(DEBUG)
554
def http_request(self, request):
555
return self.capture_connection(request, HTTPConnection)
557
def https_request(self, request):
558
return self.capture_connection(request, HTTPSConnection)
561
class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
562
"""A custom handler for HTTP(S) requests.
564
We overrive urllib_request.AbstractHTTPHandler to get a better
565
control of the connection, the ability to implement new
566
request types and return a response able to cope with
567
persistent connections.
570
# We change our order to be before urllib_request HTTP[S]Handlers
571
# and be chosen instead of them (the first http_open called
575
_default_headers = {'Pragma': 'no-cache',
576
'Cache-control': 'max-age=0',
577
'Connection': 'Keep-Alive',
578
'User-agent': default_user_agent(),
583
urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
585
def http_request(self, request):
586
"""Common headers setting"""
588
for name, value in self._default_headers.items():
589
if name not in request.headers:
590
request.headers[name] = value
591
# FIXME: We may have to add the Content-Length header if
592
# we have data to send.
595
def retry_or_raise(self, http_class, request, first_try):
596
"""Retry the request (once) or raise the exception.
598
urllib_request raises exception of application level kind, we
599
just have to translate them.
601
http_client can raise exceptions of transport level (badly
602
formatted dialog, loss of connexion or socket level
603
problems). In that case we should issue the request again
604
(http_client will close and reopen a new connection if
607
# When an exception occurs, we give back the original
608
# Traceback or the bugs are hard to diagnose.
609
exc_type, exc_val, exc_tb = sys.exc_info()
610
if exc_type == socket.gaierror:
611
# No need to retry, that will not help
612
origin_req_host = request.origin_req_host
613
raise errors.ConnectionError("Couldn't resolve host '%s'"
616
elif isinstance(exc_val, http_client.ImproperConnectionState):
617
# The http_client pipeline is in incorrect state, it's a bug in our
619
raise exc_val.with_traceback(exc_tb)
622
if self._debuglevel >= 2:
623
print('Received exception: [%r]' % exc_val)
624
print(' On connection: [%r]' % request.connection)
625
method = request.get_method()
626
url = request.get_full_url()
627
print(' Will retry, %s %r' % (method, url))
628
request.connection.close()
629
response = self.do_open(http_class, request, False)
631
if self._debuglevel >= 2:
632
print('Received second exception: [%r]' % exc_val)
633
print(' On connection: [%r]' % request.connection)
634
if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
635
# http_client.BadStatusLine and
636
# http_client.UnknownProtocol indicates that a
637
# bogus server was encountered or a bad
638
# connection (i.e. transient errors) is
639
# experimented, we have already retried once
640
# for that request so we raise the exception.
641
my_exception = errors.InvalidHttpResponse(
642
request.get_full_url(),
643
'Bad status line received',
645
elif (isinstance(exc_val, socket.error) and len(exc_val.args)
646
and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
647
# 10053 == WSAECONNABORTED
648
# 10054 == WSAECONNRESET
649
raise errors.ConnectionReset(
650
"Connection lost while sending request.")
652
# All other exception are considered connection related.
654
# socket errors generally occurs for reasons
655
# far outside our scope, so closing the
656
# connection and retrying is the best we can
658
selector = request.selector
659
my_exception = errors.ConnectionError(
660
msg='while sending %s %s:' % (request.get_method(),
664
if self._debuglevel >= 2:
665
print('On connection: [%r]' % request.connection)
666
method = request.get_method()
667
url = request.get_full_url()
668
print(' Failed again, %s %r' % (method, url))
669
print(' Will raise: [%r]' % my_exception)
670
raise my_exception.with_traceback(exc_tb)
673
def do_open(self, http_class, request, first_try=True):
674
"""See urllib_request.AbstractHTTPHandler.do_open for the general idea.
676
The request will be retried once if it fails.
678
connection = request.connection
679
if connection is None:
680
raise AssertionError(
681
'Cannot process a request without a connection')
683
# Get all the headers
685
headers.update(request.header_items())
686
headers.update(request.unredirected_hdrs)
687
# Some servers or proxies will choke on headers not properly
688
# cased. http_client/urllib/urllib_request all use capitalize to get canonical
689
# header names, but only python2.5 urllib_request use title() to fix them just
690
# before sending the request. And not all versions of python 2.5 do
691
# that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
693
headers = {name.title(): val for name, val in headers.items()}
696
method = request.get_method()
697
url = request.selector
698
if sys.version_info[:2] >= (3, 6):
699
connection._send_request(method, url,
700
# FIXME: implements 100-continue
701
# None, # We don't send the body yet
703
headers, encode_chunked=False)
705
connection._send_request(method, url,
706
# FIXME: implements 100-continue
707
# None, # We don't send the body yet
710
if 'http' in debug.debug_flags:
711
trace.mutter('> %s %s' % (method, url))
713
for k, v in headers.items():
714
# People are often told to paste -Dhttp output to help
715
# debug. Don't compromise credentials.
716
if k in ('Authorization', 'Proxy-Authorization'):
718
hdrs.append('%s: %s' % (k, v))
719
trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
720
if self._debuglevel >= 1:
721
print('Request sent: [%r] from (%s)'
722
% (request, request.connection.sock.getsockname()))
723
response = connection.getresponse()
724
convert_to_addinfourl = True
725
except (ssl.SSLError, ssl.CertificateError):
726
# Something is wrong with either the certificate or the hostname,
727
# re-trying won't help
729
except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
730
socket.error, http_client.HTTPException):
731
response = self.retry_or_raise(http_class, request, first_try)
732
convert_to_addinfourl = False
734
response.msg = response.reason
737
# FIXME: HTTPConnection does not fully support 100-continue (the
738
# server responses are just ignored)
741
# mutter('Will send the body')
742
# # We can send the body now
743
# body = request.data
745
# raise URLError("No data given")
746
# connection.send(body)
747
# response = connection.getresponse()
749
if self._debuglevel >= 2:
750
print('Receives response: %r' % response)
751
print(' For: %r(%r)' % (request.get_method(),
752
request.get_full_url()))
754
if convert_to_addinfourl:
755
# Shamelessly copied from urllib_request
759
fp = socket._fileobject(r, bufsize=65536)
760
resp = addinfourl(fp, r.msg, req.get_full_url())
763
resp.version = r.version
764
if self._debuglevel >= 2:
765
print('Create addinfourl: %r' % resp)
766
print(' For: %r(%r)' % (request.get_method(),
767
request.get_full_url()))
768
if 'http' in debug.debug_flags:
769
version = 'HTTP/%d.%d'
771
version = version % (resp.version / 10,
774
version = 'HTTP/%r' % resp.version
775
trace.mutter('< %s %s %s' % (version, resp.code,
777
# Use the raw header lines instead of treating resp.info() as a
778
# dict since we may miss duplicated headers otherwise.
779
hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
780
trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
786
class HTTPHandler(AbstractHTTPHandler):
787
"""A custom handler that just thunks into HTTPConnection"""
789
def http_open(self, request):
790
return self.do_open(HTTPConnection, request)
793
class HTTPSHandler(AbstractHTTPHandler):
794
"""A custom handler that just thunks into HTTPSConnection"""
796
https_request = AbstractHTTPHandler.http_request
798
def https_open(self, request):
799
connection = request.connection
800
if connection.sock is None and \
801
connection.proxied_host is not None and \
802
request.get_method() != 'CONNECT': # Don't loop
803
# FIXME: We need a gazillion connection tests here, but we still
804
# miss a https server :-( :
805
# - with and without proxy
806
# - with and without certificate
807
# - with self-signed certificate
808
# - with and without authentication
809
# - with good and bad credentials (especially the proxy auth around
811
# - with basic and digest schemes
812
# - reconnection on errors
813
# - connection persistence behaviour (including reconnection)
815
# We are about to connect for the first time via a proxy, we must
816
# issue a CONNECT request first to establish the encrypted link
817
connect = _ConnectRequest(request)
818
response = self.parent.open(connect)
819
if response.code != 200:
820
raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
821
connect.proxied_host, self.host))
823
connection.cleanup_pipe()
824
# Establish the connection encryption
825
connection.connect_to_origin()
826
# Propagate the connection to the original request
827
request.connection = connection
828
return self.do_open(HTTPSConnection, request)
831
class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
832
"""Handles redirect requests.
834
We have to implement our own scheme because we use a specific
835
Request object and because we want to implement a specific
839
# RFC2616 says that only read requests should be redirected
840
# without interacting with the user. But Breezy uses some
841
# shortcuts to optimize against roundtrips which can leads to
842
# write requests being issued before read requests of
843
# containing dirs can be redirected. So we redirect write
844
# requests in the same way which seems to respect the spirit
845
# of the RFC if not its letter.
847
def redirect_request(self, req, fp, code, msg, headers, newurl):
848
"""See urllib_request.HTTPRedirectHandler.redirect_request"""
849
# We would have preferred to update the request instead
850
# of creating a new one, but the urllib_request.Request object
851
# has a too complicated creation process to provide a
852
# simple enough equivalent update process. Instead, when
853
# redirecting, we only update the following request in
854
# the redirect chain with a reference to the parent
857
# Some codes make no sense in our context and are treated
860
# 300: Multiple choices for different representations of
861
# the URI. Using that mechanisn with Breezy will violate the
862
# protocol neutrality of Transport.
864
# 304: Not modified (SHOULD only occurs with conditional
865
# GETs which are not used by our implementation)
867
# 305: Use proxy. I can't imagine this one occurring in
868
# our context-- vila/20060909
870
# 306: Unused (if the RFC says so...)
872
# If the code is 302 and the request is HEAD, some may
873
# think that it is a sufficent hint that the file exists
874
# and that we MAY avoid following the redirections. But
875
# if we want to be sure, we MUST follow them.
877
origin_req_host = req.origin_req_host
879
if code in (301, 302, 303, 307, 308):
880
return Request(req.get_method(), newurl,
882
origin_req_host=origin_req_host,
884
# TODO: It will be nice to be able to
885
# detect virtual hosts sharing the same
886
# IP address, that will allow us to
887
# share the same connection...
892
raise urllib_request.HTTPError(
893
req.get_full_url(), code, msg, headers, fp)
895
def http_error_302(self, req, fp, code, msg, headers):
896
"""Requests the redirected to URI.
898
Copied from urllib_request to be able to clean the pipe of the associated
899
connection, *before* issuing the redirected request but *after* having
900
eventually raised an error.
902
# Some servers (incorrectly) return multiple Location headers
903
# (so probably same goes for URI). Use first header.
905
# TODO: Once we get rid of addinfourl objects, the
906
# following will need to be updated to use correct case
908
if 'location' in headers:
909
newurl = headers.get('location')
910
elif 'uri' in headers:
911
newurl = headers.get('uri')
915
newurl = urljoin(req.get_full_url(), newurl)
917
if self._debuglevel >= 1:
918
print('Redirected to: %s (followed: %r)' % (newurl,
919
req.follow_redirections))
920
if req.follow_redirections is False:
921
req.redirected_to = newurl
924
# This call succeeds or raise an error. urllib_request returns
925
# if redirect_request returns None, but our
926
# redirect_request never returns None.
927
redirected_req = self.redirect_request(req, fp, code, msg, headers,
931
# .redirect_dict has a key url if url was previously visited.
932
if hasattr(req, 'redirect_dict'):
933
visited = redirected_req.redirect_dict = req.redirect_dict
934
if (visited.get(newurl, 0) >= self.max_repeats or
935
len(visited) >= self.max_redirections):
936
raise urllib_request.HTTPError(req.get_full_url(), code,
937
self.inf_msg + msg, headers, fp)
939
visited = redirected_req.redirect_dict = req.redirect_dict = {}
940
visited[newurl] = visited.get(newurl, 0) + 1
942
# We can close the fp now that we are sure that we won't
943
# use it with HTTPError.
945
# We have all we need already in the response
946
req.connection.cleanup_pipe()
948
return self.parent.open(redirected_req)
950
http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
953
class ProxyHandler(urllib_request.ProxyHandler):
954
"""Handles proxy setting.
956
Copied and modified from urllib_request to be able to modify the request during
957
the request pre-processing instead of modifying it at _open time. As we
958
capture (or create) the connection object during request processing, _open
961
The main task is to modify the request so that the connection is done to
962
the proxy while the request still refers to the destination host.
964
Note: the proxy handling *may* modify the protocol used; the request may be
965
against an https server proxied through an http proxy. So, https_request
966
will be called, but later it's really http_open that will be called. This
967
explains why we don't have to call self.parent.open as the urllib_request did.
970
# Proxies must be in front
974
def __init__(self, proxies=None):
975
urllib_request.ProxyHandler.__init__(self, proxies)
976
# First, let's get rid of urllib_request implementation
977
for type, proxy in self.proxies.items():
978
if self._debuglevel >= 3:
979
print('Will unbind %s_open for %r' % (type, proxy))
980
delattr(self, '%s_open' % type)
982
def bind_scheme_request(proxy, scheme):
985
scheme_request = scheme + '_request'
986
if self._debuglevel >= 3:
987
print('Will bind %s for %r' % (scheme_request, proxy))
988
setattr(self, scheme_request,
989
lambda request: self.set_proxy(request, scheme))
990
# We are interested only by the http[s] proxies
991
http_proxy = self.get_proxy_env_var('http')
992
bind_scheme_request(http_proxy, 'http')
993
https_proxy = self.get_proxy_env_var('https')
994
bind_scheme_request(https_proxy, 'https')
996
def get_proxy_env_var(self, name, default_to='all'):
997
"""Get a proxy env var.
999
Note that we indirectly rely on
1000
urllib.getproxies_environment taking into account the
1001
uppercased values for proxy variables.
1004
return self.proxies[name.lower()]
1006
if default_to is not None:
1007
# Try to get the alternate environment variable
1009
return self.proxies[default_to]
1014
def proxy_bypass(self, host):
1015
"""Check if host should be proxied or not.
1017
:returns: True to skip the proxy, False otherwise.
1019
no_proxy = self.get_proxy_env_var('no', default_to=None)
1020
bypass = self.evaluate_proxy_bypass(host, no_proxy)
1022
# Nevertheless, there are platform-specific ways to
1024
return urllib_request.proxy_bypass(host)
1028
def evaluate_proxy_bypass(self, host, no_proxy):
1029
"""Check the host against a comma-separated no_proxy list as a string.
1031
:param host: ``host:port`` being requested
1033
:param no_proxy: comma-separated list of hosts to access directly.
1035
:returns: True to skip the proxy, False not to, or None to
1038
if no_proxy is None:
1039
# All hosts are proxied
1041
hhost, hport = splitport(host)
1042
# Does host match any of the domains mentioned in
1043
# no_proxy ? The rules about what is authorized in no_proxy
1044
# are fuzzy (to say the least). We try to allow most
1045
# commonly seen values.
1046
for domain in no_proxy.split(','):
1047
domain = domain.strip()
1050
dhost, dport = splitport(domain)
1051
if hport == dport or dport is None:
1052
# Protect glob chars
1053
dhost = dhost.replace(".", r"\.")
1054
dhost = dhost.replace("*", r".*")
1055
dhost = dhost.replace("?", r".")
1056
if re.match(dhost, hhost, re.IGNORECASE):
1058
# Nothing explicitly avoid the host
1061
def set_proxy(self, request, type):
1063
if self.proxy_bypass(host):
1066
proxy = self.get_proxy_env_var(type)
1067
if self._debuglevel >= 3:
1068
print('set_proxy %s_request for %r' % (type, proxy))
1069
# FIXME: python 2.5 urlparse provides a better _parse_proxy which can
1070
# grok user:password@host:port as well as
1071
# http://user:password@host:port
1073
parsed_url = transport.ConnectedTransport._split_url(proxy)
1074
if not parsed_url.host:
1075
raise urlutils.InvalidURL(proxy, 'No host component')
1077
if request.proxy_auth == {}:
1078
# No proxy auth parameter are available, we are handling the first
1079
# proxied request, intialize. scheme (the authentication scheme)
1080
# and realm will be set by the AuthHandler
1081
request.proxy_auth = {
1082
'host': parsed_url.host,
1083
'port': parsed_url.port,
1084
'user': parsed_url.user,
1085
'password': parsed_url.password,
1086
'protocol': parsed_url.scheme,
1087
# We ignore path since we connect to a proxy
1089
if parsed_url.port is None:
1090
phost = parsed_url.host
1092
phost = parsed_url.host + ':%d' % parsed_url.port
1093
request.set_proxy(phost, type)
1094
if self._debuglevel >= 3:
1095
print('set_proxy: proxy set to %s://%s' % (type, phost))
1099
class AbstractAuthHandler(urllib_request.BaseHandler):
1100
"""A custom abstract authentication handler for all http authentications.
1102
Provides the meat to handle authentication errors and
1103
preventively set authentication headers after the first
1104
successful authentication.
1106
This can be used for http and proxy, as well as for basic, negotiate and
1107
digest authentications.
1109
This provides an unified interface for all authentication handlers
1110
(urllib_request provides far too many with different policies).
1112
The interaction between this handler and the urllib_request
1113
framework is not obvious, it works as follow:
1115
opener.open(request) is called:
1117
- that may trigger http_request which will add an authentication header
1118
(self.build_header) if enough info is available.
1120
- the request is sent to the server,
1122
- if an authentication error is received self.auth_required is called,
1123
we acquire the authentication info in the error headers and call
1124
self.auth_match to check that we are able to try the
1125
authentication and complete the authentication parameters,
1127
- we call parent.open(request), that may trigger http_request
1128
and will add a header (self.build_header), but here we have
1129
all the required info (keep in mind that the request and
1130
authentication used in the recursive calls are really (and must be)
1131
the *same* objects).
1133
- if the call returns a response, the authentication have been
1134
successful and the request authentication parameters have been updated.
1138
"""The scheme as it appears in the server header (lower cased)"""
1141
"""We don't want to retry authenticating endlessly"""
1143
requires_username = True
1144
"""Whether the auth mechanism requires a username."""
1146
# The following attributes should be defined by daughter
1148
# - auth_required_header: the header received from the server
1149
# - auth_header: the header sent in the request
1152
# We want to know when we enter into an try/fail cycle of
1153
# authentications so we initialize to None to indicate that we aren't
1154
# in such a cycle by default.
1155
self._retry_count = None
1157
def _parse_auth_header(self, server_header):
1158
"""Parse the authentication header.
1160
:param server_header: The value of the header sent by the server
1161
describing the authenticaion request.
1163
:return: A tuple (scheme, remainder) scheme being the first word in the
1164
given header (lower cased), remainder may be None.
1167
scheme, remainder = server_header.split(None, 1)
1169
scheme = server_header
1171
return (scheme.lower(), remainder)
1173
def update_auth(self, auth, key, value):
1174
"""Update a value in auth marking the auth as modified if needed"""
1175
old_value = auth.get(key, None)
1176
if old_value != value:
1178
auth['modified'] = True
1180
def auth_required(self, request, headers):
1181
"""Retry the request if the auth scheme is ours.
1183
:param request: The request needing authentication.
1184
:param headers: The headers for the authentication error response.
1185
:return: None or the response for the authenticated request.
1187
# Don't try to authenticate endlessly
1188
if self._retry_count is None:
1189
# The retry being recusrsive calls, None identify the first retry
1190
self._retry_count = 1
1192
self._retry_count += 1
1193
if self._retry_count > self._max_retry:
1194
# Let's be ready for next round
1195
self._retry_count = None
1197
server_headers = headers.get_all(self.auth_required_header)
1198
if not server_headers:
1199
# The http error MUST have the associated
1200
# header. This must never happen in production code.
1201
trace.mutter('%s not found', self.auth_required_header)
1204
auth = self.get_auth(request)
1205
auth['modified'] = False
1206
# Put some common info in auth if the caller didn't
1207
if auth.get('path', None) is None:
1208
parsed_url = urlutils.URL.from_string(request.get_full_url())
1209
self.update_auth(auth, 'protocol', parsed_url.scheme)
1210
self.update_auth(auth, 'host', parsed_url.host)
1211
self.update_auth(auth, 'port', parsed_url.port)
1212
self.update_auth(auth, 'path', parsed_url.path)
1213
# FIXME: the auth handler should be selected at a single place instead
1214
# of letting all handlers try to match all headers, but the current
1215
# design doesn't allow a simple implementation.
1216
for server_header in server_headers:
1217
# Several schemes can be proposed by the server, try to match each
1219
matching_handler = self.auth_match(server_header, auth)
1220
if matching_handler:
1221
# auth_match may have modified auth (by adding the
1222
# password or changing the realm, for example)
1223
if (request.get_header(self.auth_header, None) is not None
1224
and not auth['modified']):
1225
# We already tried that, give up
1228
# Only the most secure scheme proposed by the server should be
1229
# used, since the handlers use 'handler_order' to describe that
1230
# property, the first handler tried takes precedence, the
1231
# others should not attempt to authenticate if the best one
1233
best_scheme = auth.get('best_scheme', None)
1234
if best_scheme is None:
1235
# At that point, if current handler should doesn't succeed
1236
# the credentials are wrong (or incomplete), but we know
1237
# that the associated scheme should be used.
1238
best_scheme = auth['best_scheme'] = self.scheme
1239
if best_scheme != self.scheme:
1242
if self.requires_username and auth.get('user', None) is None:
1243
# Without a known user, we can't authenticate
1247
request.connection.cleanup_pipe()
1248
# Retry the request with an authentication header added
1249
response = self.parent.open(request)
1251
self.auth_successful(request, response)
1253
# We are not qualified to handle the authentication.
1254
# Note: the authentication error handling will try all
1255
# available handlers. If one of them authenticates
1256
# successfully, a response will be returned. If none of
1257
# them succeeds, None will be returned and the error
1258
# handler will raise the 401 'Unauthorized' or the 407
1259
# 'Proxy Authentication Required' error.
1262
def add_auth_header(self, request, header):
1263
"""Add the authentication header to the request"""
1264
request.add_unredirected_header(self.auth_header, header)
1266
def auth_match(self, header, auth):
1267
"""Check that we are able to handle that authentication scheme.
1269
The request authentication parameters may need to be
1270
updated with info from the server. Some of these
1271
parameters, when combined, are considered to be the
1272
authentication key, if one of them change the
1273
authentication result may change. 'user' and 'password'
1274
are exampls, but some auth schemes may have others
1275
(digest's nonce is an example, digest's nonce_count is a
1276
*counter-example*). Such parameters must be updated by
1277
using the update_auth() method.
1279
:param header: The authentication header sent by the server.
1280
:param auth: The auth parameters already known. They may be
1282
:returns: True if we can try to handle the authentication.
1284
raise NotImplementedError(self.auth_match)
1286
def build_auth_header(self, auth, request):
1287
"""Build the value of the header used to authenticate.
1289
:param auth: The auth parameters needed to build the header.
1290
:param request: The request needing authentication.
1292
:return: None or header.
1294
raise NotImplementedError(self.build_auth_header)
1296
def auth_successful(self, request, response):
1297
"""The authentification was successful for the request.
1299
Additional infos may be available in the response.
1301
:param request: The succesfully authenticated request.
1302
:param response: The server response (may contain auth info).
1304
# It may happen that we need to reconnect later, let's be ready
1305
self._retry_count = None
1307
def get_user_password(self, auth):
1308
"""Ask user for a password if none is already available.
1310
:param auth: authentication info gathered so far (from the initial url
1311
and then during dialog with the server).
1313
auth_conf = config.AuthenticationConfig()
1314
user = auth.get('user', None)
1315
password = auth.get('password', None)
1316
realm = auth['realm']
1317
port = auth.get('port', None)
1320
user = auth_conf.get_user(auth['protocol'], auth['host'],
1321
port=port, path=auth['path'],
1322
realm=realm, ask=True,
1323
prompt=self.build_username_prompt(auth))
1324
if user is not None and password is None:
1325
password = auth_conf.get_password(
1326
auth['protocol'], auth['host'], user,
1328
path=auth['path'], realm=realm,
1329
prompt=self.build_password_prompt(auth))
1331
return user, password
1333
def _build_password_prompt(self, auth):
1334
"""Build a prompt taking the protocol used into account.
1336
The AuthHandler is used by http and https, we want that information in
1337
the prompt, so we build the prompt from the authentication dict which
1338
contains all the needed parts.
1340
Also, http and proxy AuthHandlers present different prompts to the
1341
user. The daughter classes should implements a public
1342
build_password_prompt using this method.
1344
prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
1345
realm = auth['realm']
1346
if realm is not None:
1347
prompt += u", Realm: '%s'" % realm
1348
prompt += u' password'
1351
def _build_username_prompt(self, auth):
1352
"""Build a prompt taking the protocol used into account.
1354
The AuthHandler is used by http and https, we want that information in
1355
the prompt, so we build the prompt from the authentication dict which
1356
contains all the needed parts.
1358
Also, http and proxy AuthHandlers present different prompts to the
1359
user. The daughter classes should implements a public
1360
build_username_prompt using this method.
1362
prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
1363
realm = auth['realm']
1364
if realm is not None:
1365
prompt += u", Realm: '%s'" % realm
1366
prompt += u' username'
1369
def http_request(self, request):
1370
"""Insert an authentication header if information is available"""
1371
auth = self.get_auth(request)
1372
if self.auth_params_reusable(auth):
1373
self.add_auth_header(
1374
request, self.build_auth_header(auth, request))
1377
https_request = http_request # FIXME: Need test
1380
class NegotiateAuthHandler(AbstractAuthHandler):
1381
"""A authentication handler that handles WWW-Authenticate: Negotiate.
1383
At the moment this handler supports just Kerberos. In the future,
1384
NTLM support may also be added.
1387
scheme = 'negotiate'
1389
requires_username = False
1391
def auth_match(self, header, auth):
1392
scheme, raw_auth = self._parse_auth_header(header)
1393
if scheme != self.scheme:
1395
self.update_auth(auth, 'scheme', scheme)
1396
resp = self._auth_match_kerberos(auth)
1399
# Optionally should try to authenticate using NTLM here
1400
self.update_auth(auth, 'negotiate_response', resp)
1403
def _auth_match_kerberos(self, auth):
1404
"""Try to create a GSSAPI response for authenticating against a host."""
1405
global kerberos, checked_kerberos
1406
if kerberos is None and not checked_kerberos:
1411
checked_kerberos = True
1412
if kerberos is None:
1414
ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
1416
trace.warning('Unable to create GSSAPI context for %s: %d',
1419
ret = kerberos.authGSSClientStep(vc, "")
1421
trace.mutter('authGSSClientStep failed: %d', ret)
1423
return kerberos.authGSSClientResponse(vc)
1425
def build_auth_header(self, auth, request):
1426
return "Negotiate %s" % auth['negotiate_response']
1428
def auth_params_reusable(self, auth):
1429
# If the auth scheme is known, it means a previous
1430
# authentication was successful, all information is
1431
# available, no further checks are needed.
1432
return (auth.get('scheme', None) == 'negotiate' and
1433
auth.get('negotiate_response', None) is not None)
1436
class BasicAuthHandler(AbstractAuthHandler):
1437
"""A custom basic authentication handler."""
1441
auth_regexp = re.compile('realm="([^"]*)"', re.I)
1443
def build_auth_header(self, auth, request):
1444
raw = '%s:%s' % (auth['user'], auth['password'])
1445
auth_header = 'Basic ' + \
1446
base64.b64encode(raw.encode('utf-8')).decode('ascii')
1449
def extract_realm(self, header_value):
1450
match = self.auth_regexp.search(header_value)
1453
realm = match.group(1)
1456
def auth_match(self, header, auth):
1457
scheme, raw_auth = self._parse_auth_header(header)
1458
if scheme != self.scheme:
1461
match, realm = self.extract_realm(raw_auth)
1463
# Put useful info into auth
1464
self.update_auth(auth, 'scheme', scheme)
1465
self.update_auth(auth, 'realm', realm)
1466
if (auth.get('user', None) is None
1467
or auth.get('password', None) is None):
1468
user, password = self.get_user_password(auth)
1469
self.update_auth(auth, 'user', user)
1470
self.update_auth(auth, 'password', password)
1471
return match is not None
1473
def auth_params_reusable(self, auth):
1474
# If the auth scheme is known, it means a previous
1475
# authentication was successful, all information is
1476
# available, no further checks are needed.
1477
return auth.get('scheme', None) == 'basic'
1480
def get_digest_algorithm_impls(algorithm):
1483
if algorithm == 'MD5':
1484
def H(x): return osutils.md5(x).hexdigest()
1485
elif algorithm == 'SHA':
1486
H = osutils.sha_string
1488
def KD(secret, data): return H(
1489
("%s:%s" % (secret, data)).encode('utf-8'))
1493
def get_new_cnonce(nonce, nonce_count):
1494
raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
1495
osutils.rand_chars(8))
1496
return osutils.sha_string(raw.encode('utf-8'))[:16]
1499
class DigestAuthHandler(AbstractAuthHandler):
1500
"""A custom digest authentication handler."""
1503
# Before basic as digest is a bit more secure and should be preferred
1506
def auth_params_reusable(self, auth):
1507
# If the auth scheme is known, it means a previous
1508
# authentication was successful, all information is
1509
# available, no further checks are needed.
1510
return auth.get('scheme', None) == 'digest'
1512
def auth_match(self, header, auth):
1513
scheme, raw_auth = self._parse_auth_header(header)
1514
if scheme != self.scheme:
1517
# Put the requested authentication info into a dict
1518
req_auth = urllib_request.parse_keqv_list(
1519
urllib_request.parse_http_list(raw_auth))
1521
# Check that we can handle that authentication
1522
qop = req_auth.get('qop', None)
1523
if qop != 'auth': # No auth-int so far
1526
H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
1530
realm = req_auth.get('realm', None)
1531
# Put useful info into auth
1532
self.update_auth(auth, 'scheme', scheme)
1533
self.update_auth(auth, 'realm', realm)
1534
if auth.get('user', None) is None or auth.get('password', None) is None:
1535
user, password = self.get_user_password(auth)
1536
self.update_auth(auth, 'user', user)
1537
self.update_auth(auth, 'password', password)
1540
if req_auth.get('algorithm', None) is not None:
1541
self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1542
nonce = req_auth['nonce']
1543
if auth.get('nonce', None) != nonce:
1544
# A new nonce, never used
1545
self.update_auth(auth, 'nonce_count', 0)
1546
self.update_auth(auth, 'nonce', nonce)
1547
self.update_auth(auth, 'qop', qop)
1548
auth['opaque'] = req_auth.get('opaque', None)
1550
# Some required field is not there
1555
def build_auth_header(self, auth, request):
1556
selector = request.selector
1557
url_scheme, url_selector = splittype(selector)
1558
sel_host, uri = splithost(url_selector)
1561
(auth['user'], auth['realm'], auth['password'])).encode('utf-8')
1562
A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
1564
nonce = auth['nonce']
1567
nonce_count = auth['nonce_count'] + 1
1568
ncvalue = '%08x' % nonce_count
1569
cnonce = get_new_cnonce(nonce, nonce_count)
1571
H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1572
nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1573
request_digest = KD(H(A1), nonce_data)
1576
header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
1579
header += ', uri="%s"' % uri
1580
header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1581
header += ', qop="%s"' % qop
1582
header += ', response="%s"' % request_digest
1583
# Append the optional fields
1584
opaque = auth.get('opaque', None)
1586
header += ', opaque="%s"' % opaque
1587
if auth.get('algorithm', None):
1588
header += ', algorithm="%s"' % auth.get('algorithm')
1590
# We have used the nonce once more, update the count
1591
auth['nonce_count'] = nonce_count
1596
class HTTPAuthHandler(AbstractAuthHandler):
1597
"""Custom http authentication handler.
1599
Send the authentication preventively to avoid the roundtrip
1600
associated with the 401 error and keep the revelant info in
1601
the auth request attribute.
1604
auth_required_header = 'www-authenticate'
1605
auth_header = 'Authorization'
1607
def get_auth(self, request):
1608
"""Get the auth params from the request"""
1611
def set_auth(self, request, auth):
1612
"""Set the auth params for the request"""
1615
def build_password_prompt(self, auth):
1616
return self._build_password_prompt(auth)
1618
def build_username_prompt(self, auth):
1619
return self._build_username_prompt(auth)
1621
def http_error_401(self, req, fp, code, msg, headers):
1622
return self.auth_required(req, headers)
1625
class ProxyAuthHandler(AbstractAuthHandler):
1626
"""Custom proxy authentication handler.
1628
Send the authentication preventively to avoid the roundtrip
1629
associated with the 407 error and keep the revelant info in
1630
the proxy_auth request attribute..
1633
auth_required_header = 'proxy-authenticate'
1634
# FIXME: the correct capitalization is Proxy-Authorization,
1635
# but python-2.4 urllib_request.Request insist on using capitalize()
1636
# instead of title().
1637
auth_header = 'Proxy-authorization'
1639
def get_auth(self, request):
1640
"""Get the auth params from the request"""
1641
return request.proxy_auth
1643
def set_auth(self, request, auth):
1644
"""Set the auth params for the request"""
1645
request.proxy_auth = auth
1647
def build_password_prompt(self, auth):
1648
prompt = self._build_password_prompt(auth)
1649
prompt = u'Proxy ' + prompt
1652
def build_username_prompt(self, auth):
1653
prompt = self._build_username_prompt(auth)
1654
prompt = u'Proxy ' + prompt
1657
def http_error_407(self, req, fp, code, msg, headers):
1658
return self.auth_required(req, headers)
1661
class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
1662
"""Custom http basic authentication handler"""
1665
class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
1666
"""Custom proxy basic authentication handler"""
1669
class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
1670
"""Custom http basic authentication handler"""
1673
class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
1674
"""Custom proxy basic authentication handler"""
1677
class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
1678
"""Custom http negotiate authentication handler"""
1681
class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
1682
"""Custom proxy negotiate authentication handler"""
1685
class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
1686
"""Process HTTP error responses.
1688
We don't really process the errors, quite the contrary
1689
instead, we leave our Transport handle them.
1692
accepted_errors = [200, # Ok
1696
206, # Partial content
1700
405, # Method not allowed
1701
406, # Not Acceptable
1703
416, # Range not satisfiable
1704
422, # Unprocessible entity
1705
501, # Not implemented
1707
"""The error codes the caller will handle.
1709
This can be specialized in the request on a case-by case basis, but the
1710
common cases are covered here.
1713
def http_response(self, request, response):
1714
code, msg, hdrs = response.code, response.msg, response.info()
1716
if code not in self.accepted_errors:
1717
response = self.parent.error('http', request, response,
1721
https_response = http_response
1724
class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
1725
"""Translate common errors into Breezy Exceptions"""
1727
def http_error_default(self, req, fp, code, msg, hdrs):
1729
raise errors.TransportError(
1730
'Server refuses to fulfill the request (403 Forbidden)'
1731
' for %s' % req.get_full_url())
1733
raise errors.UnexpectedHttpStatus(
1734
req.get_full_url(), code,
1735
'Unable to handle http code: %s' % msg)
1738
class Opener(object):
1739
"""A wrapper around urllib_request.build_opener
1741
Daughter classes can override to build their own specific opener
1743
# TODO: Provides hooks for daughter classes.
1746
connection=ConnectionHandler,
1747
redirect=HTTPRedirectHandler,
1748
error=HTTPErrorProcessor,
1749
report_activity=None,
1751
self._opener = urllib_request.build_opener(
1752
connection(report_activity=report_activity, ca_certs=ca_certs),
1755
HTTPBasicAuthHandler(),
1756
HTTPDigestAuthHandler(),
1757
HTTPNegotiateAuthHandler(),
1758
ProxyBasicAuthHandler(),
1759
ProxyDigestAuthHandler(),
1760
ProxyNegotiateAuthHandler(),
1763
HTTPDefaultErrorHandler,
1766
self.open = self._opener.open
1768
# When dealing with handler order, it's easy to mess
1769
# things up, the following will help understand which
1770
# handler is used, when and for what.
1772
pprint.pprint(self._opener.__dict__)
1775
class HttpTransport(ConnectedTransport):
1776
"""HTTP Client implementations.
1778
The protocol can be given as e.g. http+urllib://host/ to use a particular
1782
# _unqualified_scheme: "http" or "https"
1783
# _scheme: may have "+pycurl", etc
1785
# In order to debug we have to issue our traces in sync with
1786
# httplib, which use print :(
1789
def __init__(self, base, _from_transport=None, ca_certs=None):
1790
"""Set the base path where files will be stored."""
1791
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
1793
raise AssertionError("not a http url: %r" % base)
1794
self._unqualified_scheme = proto_match.group(1)
1795
super(HttpTransport, self).__init__(
1796
base, _from_transport=_from_transport)
1798
# range hint is handled dynamically throughout the life
1799
# of the transport object. We start by trying multi-range
1800
# requests and if the server returns bogus results, we
1801
# retry with single range requests and, finally, we
1802
# forget about range if the server really can't
1803
# understand. Once acquired, this piece of info is
1804
# propagated to clones.
1805
if _from_transport is not None:
1806
self._range_hint = _from_transport._range_hint
1807
self._opener = _from_transport._opener
1809
self._range_hint = 'multi'
1810
self._opener = Opener(
1811
report_activity=self._report_activity, ca_certs=ca_certs)
1813
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
1814
body = urlopen_kw.pop('body', None)
1815
if fields is not None:
1816
data = urlencode(fields).encode()
1817
if body is not None:
1819
'body and fields are mutually exclusive')
1824
request = Request(method, url, data, headers)
1825
request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)
1827
raise NotImplementedError(
1828
'unknown arguments: %r' % urlopen_kw.keys())
1829
connection = self._get_connection()
1830
if connection is not None:
1831
# Give back shared info
1832
request.connection = connection
1833
(auth, proxy_auth) = self._get_credentials()
1834
# Clean the httplib.HTTPConnection pipeline in case the previous
1835
# request couldn't do it
1836
connection.cleanup_pipe()
1838
# First request, initialize credentials.
1839
# scheme and realm will be set by the _urllib2_wrappers.AuthHandler
1840
auth = self._create_auth()
1841
# Proxy initialization will be done by the first proxied request
1843
# Ensure authentication info is provided
1845
request.proxy_auth = proxy_auth
1847
if self._debuglevel > 0:
1848
print('perform: %s base: %s, url: %s' % (request.method, self.base,
1849
request.get_full_url()))
1850
response = self._opener.open(request)
1851
if self._get_connection() is not request.connection:
1852
# First connection or reconnection
1853
self._set_connection(request.connection,
1854
(request.auth, request.proxy_auth))
1856
# http may change the credentials while keeping the
1858
self._update_credentials((request.auth, request.proxy_auth))
1860
code = response.code
1861
if (request.follow_redirections is False
1862
and code in (301, 302, 303, 307, 308)):
1863
raise errors.RedirectRequested(request.get_full_url(),
1864
request.redirected_to,
1865
is_permanent=(code in (301, 308)))
1867
if request.redirected_to is not None:
1868
trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),
1869
request.redirected_to))
1871
class Urllib3LikeResponse(object):
1873
def __init__(self, actual):
1874
self._actual = actual
1877
def getheader(self, name, default=None):
1878
if self._actual.headers is None:
1879
raise http_client.ResponseNotReady()
1880
return self._actual.headers.get(name, default)
1882
def getheaders(self):
1883
if self._actual.headers is None:
1884
raise http_client.ResponseNotReady()
1885
return list(self._actual.headers.items())
1889
return self._actual.code
1893
return self._actual.reason
1897
if self._data is None:
1898
self._data = self._actual.read()
1903
if self.status == 204:
1905
charset = cgi.parse_header(
1906
self._actual.headers['Content-Type'])[1].get('charset')
1908
return self.data.decode(charset)
1910
return self.data.decode()
1912
def read(self, amt=None):
1913
return self._actual.read(amt)
1915
def readlines(self):
1916
return self._actual.readlines()
1918
def readline(self, size=-1):
1919
return self._actual.readline(size)
1921
return Urllib3LikeResponse(response)
1923
def disconnect(self):
1924
connection = self._get_connection()
1925
if connection is not None:
1928
def has(self, relpath):
1929
"""Does the target location exist?
1931
response = self._head(relpath)
1933
code = response.status
1934
if code == 200: # "ok",
1939
def get(self, relpath):
1940
"""Get the file at the given relative path.
1942
:param relpath: The relative path to the file
1944
code, response_file = self._get(relpath, None)
1945
return response_file
1947
def _get(self, relpath, offsets, tail_amount=0):
1948
"""Get a file, or part of a file.
1950
:param relpath: Path relative to transport base URL
1951
:param offsets: None to get the whole file;
1952
or a list of _CoalescedOffset to fetch parts of a file.
1953
:param tail_amount: The amount to get from the end of the file.
1955
:returns: (http_code, result_file)
1957
abspath = self._remote_path(relpath)
1959
if offsets or tail_amount:
1960
range_header = self._attempted_range_header(offsets, tail_amount)
1961
if range_header is not None:
1962
bytes = 'bytes=' + range_header
1963
headers = {'Range': bytes}
1967
response = self.request('GET', abspath, headers=headers)
1969
if response.status == 404: # not found
1970
raise errors.NoSuchFile(abspath)
1971
elif response.status == 416:
1972
# We don't know which, but one of the ranges we specified was
1974
raise errors.InvalidHttpRange(abspath, range_header,
1975
'Server return code %d' % response.status)
1976
elif response.status == 400:
1978
# We don't know which, but one of the ranges we specified was
1980
raise errors.InvalidHttpRange(
1981
abspath, range_header,
1982
'Server return code %d' % response.status)
1984
raise errors.BadHttpRequest(abspath, response.reason)
1985
elif response.status not in (200, 206):
1986
raise errors.UnexpectedHttpStatus(abspath, response.status)
1988
data = handle_response(
1989
abspath, response.status, response.getheader, response)
1990
return response.status, data
1992
def _remote_path(self, relpath):
1993
"""See ConnectedTransport._remote_path.
1995
user and passwords are not embedded in the path provided to the server.
1997
url = self._parsed_url.clone(relpath)
1998
url.user = url.quoted_user = None
1999
url.password = url.quoted_password = None
2000
url.scheme = self._unqualified_scheme
2003
def _create_auth(self):
2004
"""Returns a dict containing the credentials provided at build time."""
2005
auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
2006
user=self._parsed_url.user, password=self._parsed_url.password,
2007
protocol=self._unqualified_scheme,
2008
path=self._parsed_url.path)
2011
def get_smart_medium(self):
2012
"""See Transport.get_smart_medium."""
2013
if self._medium is None:
2014
# Since medium holds some state (smart server probing at least), we
2015
# need to keep it around. Note that this is needed because medium
2016
# has the same 'base' attribute as the transport so it can't be
2017
# shared between transports having different bases.
2018
self._medium = SmartClientHTTPMedium(self)
2021
def _degrade_range_hint(self, relpath, ranges):
2022
if self._range_hint == 'multi':
2023
self._range_hint = 'single'
2024
mutter('Retry "%s" with single range request' % relpath)
2025
elif self._range_hint == 'single':
2026
self._range_hint = None
2027
mutter('Retry "%s" without ranges' % relpath)
2029
# We tried all the tricks, but nothing worked, caller must reraise.
2033
# _coalesce_offsets is a helper for readv, it try to combine ranges without
2034
# degrading readv performances. _bytes_to_read_before_seek is the value
2035
# used for the limit parameter and has been tuned for other transports. For
2036
# HTTP, the name is inappropriate but the parameter is still useful and
2037
# helps reduce the number of chunks in the response. The overhead for a
2038
# chunk (headers, length, footer around the data itself is variable but
2039
# around 50 bytes. We use 128 to reduce the range specifiers that appear in
2040
# the header, some servers (notably Apache) enforce a maximum length for a
2041
# header and issue a '400: Bad request' error when too much ranges are
2043
_bytes_to_read_before_seek = 128
2044
# No limit on the offset number that get combined into one, we are trying
2045
# to avoid downloading the whole file.
2046
_max_readv_combine = 0
2047
# By default Apache has a limit of ~400 ranges before replying with a 400
2048
# Bad Request. So we go underneath that amount to be safe.
2049
_max_get_ranges = 200
2050
# We impose no limit on the range size. But see _pycurl.py for a different
2054
def _readv(self, relpath, offsets):
2055
"""Get parts of the file at the given relative path.
2057
:param offsets: A list of (offset, size) tuples.
2058
:param return: A list or generator of (offset, data) tuples
2060
# offsets may be a generator, we will iterate it several times, so
2062
offsets = list(offsets)
2065
retried_offset = None
2069
# Coalesce the offsets to minimize the GET requests issued
2070
sorted_offsets = sorted(offsets)
2071
coalesced = self._coalesce_offsets(
2072
sorted_offsets, limit=self._max_readv_combine,
2073
fudge_factor=self._bytes_to_read_before_seek,
2074
max_size=self._get_max_size)
2076
# Turn it into a list, we will iterate it several times
2077
coalesced = list(coalesced)
2078
if 'http' in debug.debug_flags:
2079
mutter('http readv of %s offsets => %s collapsed %s',
2080
relpath, len(offsets), len(coalesced))
2082
# Cache the data read, but only until it's been used
2084
# We will iterate on the data received from the GET requests and
2085
# serve the corresponding offsets respecting the initial order. We
2086
# need an offset iterator for that.
2087
iter_offsets = iter(offsets)
2089
cur_offset_and_size = next(iter_offsets)
2090
except StopIteration:
2094
for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
2095
# Split the received chunk
2096
for offset, size in cur_coal.ranges:
2097
start = cur_coal.start + offset
2098
rfile.seek(start, os.SEEK_SET)
2099
data = rfile.read(size)
2100
data_len = len(data)
2101
if data_len != size:
2102
raise errors.ShortReadvError(relpath, start, size,
2104
if (start, size) == cur_offset_and_size:
2105
# The offset requested are sorted as the coalesced
2106
# ones, no need to cache. Win !
2107
yield cur_offset_and_size[0], data
2109
cur_offset_and_size = next(iter_offsets)
2110
except StopIteration:
2113
# Different sorting. We need to cache.
2114
data_map[(start, size)] = data
2116
# Yield everything we can
2117
while cur_offset_and_size in data_map:
2118
# Clean the cached data since we use it
2119
# XXX: will break if offsets contains duplicates --
2121
this_data = data_map.pop(cur_offset_and_size)
2122
yield cur_offset_and_size[0], this_data
2124
cur_offset_and_size = next(iter_offsets)
2125
except StopIteration:
2128
except (errors.ShortReadvError, errors.InvalidRange,
2129
errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:
2130
mutter('Exception %r: %s during http._readv', e, e)
2131
if (not isinstance(e, errors.ShortReadvError)
2132
or retried_offset == cur_offset_and_size):
2133
# We don't degrade the range hint for ShortReadvError since
2134
# they do not indicate a problem with the server ability to
2135
# handle ranges. Except when we fail to get back a required
2136
# offset twice in a row. In that case, falling back to
2137
# single range or whole file should help.
2138
if not self._degrade_range_hint(relpath, coalesced):
2140
# Some offsets may have been already processed, so we retry
2141
# only the unsuccessful ones.
2142
offsets = [cur_offset_and_size] + [o for o in iter_offsets]
2143
retried_offset = cur_offset_and_size
2146
def _coalesce_readv(self, relpath, coalesced):
2147
"""Issue several GET requests to satisfy the coalesced offsets"""
2149
def get_and_yield(relpath, coalesced):
2151
# Note that the _get below may raise
2152
# errors.InvalidHttpRange. It's the caller's responsibility to
2153
# decide how to retry since it may provide different coalesced
2155
code, rfile = self._get(relpath, coalesced)
2156
for coal in coalesced:
2159
if self._range_hint is None:
2160
# Download whole file
2161
for c, rfile in get_and_yield(relpath, coalesced):
2164
total = len(coalesced)
2165
if self._range_hint == 'multi':
2166
max_ranges = self._max_get_ranges
2167
elif self._range_hint == 'single':
2170
raise AssertionError("Unknown _range_hint %r"
2171
% (self._range_hint,))
2172
# TODO: Some web servers may ignore the range requests and return
2173
# the whole file, we may want to detect that and avoid further
2175
# Hint: test_readv_multiple_get_requests will fail once we do that
2178
for coal in coalesced:
2179
if ((self._get_max_size > 0
2180
and cumul + coal.length > self._get_max_size) or
2181
len(ranges) >= max_ranges):
2182
# Get that much and yield
2183
for c, rfile in get_and_yield(relpath, ranges):
2185
# Restart with the current offset
2190
cumul += coal.length
2191
# Get the rest and yield
2192
for c, rfile in get_and_yield(relpath, ranges):
2195
def recommended_page_size(self):
2196
"""See Transport.recommended_page_size().
2198
For HTTP we suggest a large page size to reduce the overhead
2199
introduced by latency.
2203
def _post(self, body_bytes):
2204
"""POST body_bytes to .bzr/smart on this transport.
2206
:returns: (response code, response body file-like object).
2208
# TODO: Requiring all the body_bytes to be available at the beginning of
2209
# the POST may require large client buffers. It would be nice to have
2210
# an interface that allows streaming via POST when possible (and
2211
# degrades to a local buffer when not).
2212
abspath = self._remote_path('.bzr/smart')
2213
response = self.request(
2214
'POST', abspath, body=body_bytes,
2215
headers={'Content-Type': 'application/octet-stream'})
2216
if response.status not in (200, 403):
2217
raise errors.UnexpectedHttpStatus(abspath, response.status)
2218
code = response.status
2219
data = handle_response(
2220
abspath, code, response.getheader, response)
2223
def _head(self, relpath):
2224
"""Request the HEAD of a file.
2226
Performs the request and leaves callers handle the results.
2228
abspath = self._remote_path(relpath)
2229
response = self.request('HEAD', abspath)
2230
if response.status not in (200, 404):
2231
raise errors.UnexpectedHttpStatus(abspath, response.status)
2235
raise NotImplementedError(self._post)
2237
def put_file(self, relpath, f, mode=None):
2238
"""Copy the file-like object into the location.
2240
:param relpath: Location to put the contents, relative to base.
2241
:param f: File-like object.
2243
raise errors.TransportNotPossible('http PUT not supported')
2245
def mkdir(self, relpath, mode=None):
2246
"""Create a directory at the given path."""
2247
raise errors.TransportNotPossible('http does not support mkdir()')
2249
def rmdir(self, relpath):
2250
"""See Transport.rmdir."""
2251
raise errors.TransportNotPossible('http does not support rmdir()')
2253
def append_file(self, relpath, f, mode=None):
2254
"""Append the text in the file-like object into the final
2257
raise errors.TransportNotPossible('http does not support append()')
2259
def copy(self, rel_from, rel_to):
2260
"""Copy the item at rel_from to the location at rel_to"""
2261
raise errors.TransportNotPossible('http does not support copy()')
2263
def copy_to(self, relpaths, other, mode=None, pb=None):
2264
"""Copy a set of entries from self into another Transport.
2266
:param relpaths: A list/generator of entries to be copied.
2268
TODO: if other is LocalTransport, is it possible to
2269
do better than put(get())?
2271
# At this point HttpTransport might be able to check and see if
2272
# the remote location is the same, and rather than download, and
2273
# then upload, it could just issue a remote copy_this command.
2274
if isinstance(other, HttpTransport):
2275
raise errors.TransportNotPossible(
2276
'http cannot be the target of copy_to()')
2278
return super(HttpTransport, self).\
2279
copy_to(relpaths, other, mode=mode, pb=pb)
2281
def move(self, rel_from, rel_to):
2282
"""Move the item at rel_from to the location at rel_to"""
2283
raise errors.TransportNotPossible('http does not support move()')
2285
def delete(self, relpath):
2286
"""Delete the item at relpath"""
2287
raise errors.TransportNotPossible('http does not support delete()')
2289
def external_url(self):
2290
"""See breezy.transport.Transport.external_url."""
2291
# HTTP URL's are externally usable as long as they don't mention their
2292
# implementation qualifier
2293
url = self._parsed_url.clone()
2294
url.scheme = self._unqualified_scheme
2297
def is_readonly(self):
2298
"""See Transport.is_readonly."""
2302
"""See Transport.listable."""
2305
def stat(self, relpath):
2306
"""Return the stat information for a file.
2308
raise errors.TransportNotPossible('http does not support stat()')
2310
def lock_read(self, relpath):
2311
"""Lock the given file for shared (read) access.
2312
:return: A lock object, which should be passed to Transport.unlock()
2314
# The old RemoteBranch ignore lock for reading, so we will
2315
# continue that tradition and return a bogus lock object.
2316
class BogusLock(object):
2317
def __init__(self, path):
2322
return BogusLock(relpath)
2324
def lock_write(self, relpath):
2325
"""Lock the given file for exclusive (write) access.
2326
WARNING: many transports do not support this, so trying avoid using it
2328
:return: A lock object, which should be passed to Transport.unlock()
2330
raise errors.TransportNotPossible('http does not support lock_write()')
2332
def _attempted_range_header(self, offsets, tail_amount):
2333
"""Prepare a HTTP Range header at a level the server should accept.
2335
:return: the range header representing offsets/tail_amount or None if
2336
no header can be built.
2339
if self._range_hint == 'multi':
2340
# Generate the header describing all offsets
2341
return self._range_header(offsets, tail_amount)
2342
elif self._range_hint == 'single':
2343
# Combine all the requested ranges into a single
2345
if len(offsets) > 0:
2346
if tail_amount not in (0, None):
2347
# Nothing we can do here to combine ranges with tail_amount
2348
# in a single range, just returns None. The whole file
2349
# should be downloaded.
2352
start = offsets[0].start
2354
end = last.start + last.length - 1
2355
whole = self._coalesce_offsets([(start, end - start + 1)],
2356
limit=0, fudge_factor=0)
2357
return self._range_header(list(whole), 0)
2359
# Only tail_amount, requested, leave range_header
2361
return self._range_header(offsets, tail_amount)
2366
def _range_header(ranges, tail_amount):
2367
"""Turn a list of bytes ranges into a HTTP Range header value.
2369
:param ranges: A list of _CoalescedOffset
2370
:param tail_amount: The amount to get from the end of the file.
2372
:return: HTTP range header string.
2374
At least a non-empty ranges *or* a tail_amount must be
2378
for offset in ranges:
2379
strings.append('%d-%d' % (offset.start,
2380
offset.start + offset.length - 1))
2383
strings.append('-%d' % tail_amount)
2385
return ','.join(strings)
2387
def _redirected_to(self, source, target):
2388
"""Returns a transport suitable to re-issue a redirected request.
2390
:param source: The source url as returned by the server.
2391
:param target: The target url as returned by the server.
2393
The redirection can be handled only if the relpath involved is not
2394
renamed by the redirection.
2396
:returns: A transport
2397
:raise UnusableRedirect: when the URL can not be reinterpreted
2399
parsed_source = self._split_url(source)
2400
parsed_target = self._split_url(target)
2401
pl = len(self._parsed_url.path)
2402
# determine the excess tail - the relative path that was in
2403
# the original request but not part of this transports' URL.
2404
excess_tail = parsed_source.path[pl:].strip("/")
2405
if not parsed_target.path.endswith(excess_tail):
2406
# The final part of the url has been renamed, we can't handle the
2408
raise UnusableRedirect(
2409
source, target, "final part of the url was renamed")
2411
target_path = parsed_target.path
2413
# Drop the tail that was in the redirect but not part of
2414
# the path of this transport.
2415
target_path = target_path[:-len(excess_tail)]
2417
if parsed_target.scheme in ('http', 'https'):
2418
# Same protocol family (i.e. http[s]), we will preserve the same
2419
# http client implementation when a redirection occurs from one to
2420
# the other (otherwise users may be surprised that bzr switches
2421
# from one implementation to the other, and devs may suffer
2423
if (parsed_target.scheme == self._unqualified_scheme
2424
and parsed_target.host == self._parsed_url.host
2425
and parsed_target.port == self._parsed_url.port
2426
and (parsed_target.user is None or
2427
parsed_target.user == self._parsed_url.user)):
2428
# If a user is specified, it should match, we don't care about
2429
# passwords, wrong passwords will be rejected anyway.
2430
return self.clone(target_path)
2432
# Rebuild the url preserving the scheme qualification and the
2433
# credentials (if they don't apply, the redirected to server
2434
# will tell us, but if they do apply, we avoid prompting the
2436
redir_scheme = parsed_target.scheme
2437
new_url = self._unsplit_url(redir_scheme,
2438
self._parsed_url.user,
2439
self._parsed_url.password,
2440
parsed_target.host, parsed_target.port,
2442
return transport.get_transport_from_url(new_url)
2444
# Redirected to a different protocol
2445
new_url = self._unsplit_url(parsed_target.scheme,
2447
parsed_target.password,
2448
parsed_target.host, parsed_target.port,
2450
return transport.get_transport_from_url(new_url)
2452
def _options(self, relpath):
2453
abspath = self._remote_path(relpath)
2454
resp = self.request('OPTIONS', abspath)
2455
if resp.status == 404:
2456
raise errors.NoSuchFile(abspath)
2457
if resp.status in (403, 405):
2458
raise errors.InvalidHttpResponse(
2460
"OPTIONS not supported or forbidden for remote URL")
2461
return resp.getheaders()
2464
# TODO: May be better located in smart/medium.py with the other
2465
# SmartMedium classes
2466
class SmartClientHTTPMedium(medium.SmartClientMedium):
2468
def __init__(self, http_transport):
2469
super(SmartClientHTTPMedium, self).__init__(http_transport.base)
2470
# We don't want to create a circular reference between the http
2471
# transport and its associated medium. Since the transport will live
2472
# longer than the medium, the medium keep only a weak reference to its
2474
self._http_transport_ref = weakref.ref(http_transport)
2476
def get_request(self):
2477
return SmartClientHTTPMediumRequest(self)
2479
def should_probe(self):
2482
def remote_path_from_transport(self, transport):
2483
# Strip the optional 'bzr+' prefix from transport so it will have the
2484
# same scheme as self.
2485
transport_base = transport.base
2486
if transport_base.startswith('bzr+'):
2487
transport_base = transport_base[4:]
2488
rel_url = urlutils.relative_url(self.base, transport_base)
2489
return urlutils.unquote(rel_url)
2491
def send_http_smart_request(self, bytes):
2493
# Get back the http_transport hold by the weak reference
2494
t = self._http_transport_ref()
2495
code, body_filelike = t._post(bytes)
2497
raise errors.UnexpectedHttpStatus(
2498
t._remote_path('.bzr/smart'), code)
2499
except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:
2500
raise errors.SmartProtocolError(str(e))
2501
return body_filelike
2503
def _report_activity(self, bytes, direction):
2504
"""See SmartMedium._report_activity.
2506
Does nothing; the underlying plain HTTP transport will report the
2507
activity that this medium would report.
2511
def disconnect(self):
2512
"""See SmartClientMedium.disconnect()."""
2513
t = self._http_transport_ref()
2517
# TODO: May be better located in smart/medium.py with the other
2518
# SmartMediumRequest classes
2519
class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
2520
"""A SmartClientMediumRequest that works with an HTTP medium."""
2522
def __init__(self, client_medium):
2523
medium.SmartClientMediumRequest.__init__(self, client_medium)
2526
def _accept_bytes(self, bytes):
2527
self._buffer += bytes
2529
def _finished_writing(self):
2530
data = self._medium.send_http_smart_request(self._buffer)
2531
self._response_body = data
2533
def _read_bytes(self, count):
2534
"""See SmartClientMediumRequest._read_bytes."""
2535
return self._response_body.read(count)
2537
def _read_line(self):
2538
line, excess = medium._get_line(self._response_body.read)
2540
raise AssertionError(
2541
'_get_line returned excess bytes, but this mediumrequest '
2542
'cannot handle excess. (%r)' % (excess,))
2545
def _finished_reading(self):
2546
"""See SmartClientMediumRequest._finished_reading."""
2550
def unhtml_roughly(maybe_html, length_limit=1000):
2551
"""Very approximate html->text translation, for presenting error bodies.
2553
:param length_limit: Truncate the result to this many characters.
2555
>>> unhtml_roughly("<b>bad</b> things happened\\n")
2556
' bad things happened '
2558
return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit]
2561
def get_test_permutations():
2562
"""Return the permutations to be used in testing."""
2563
from breezy.tests import (
2567
permutations = [(HttpTransport, http_server.HttpServer), ]
2568
if features.HTTPSServerFeature.available():
2569
from breezy.tests import (
2574
class HTTPS_transport(HttpTransport):
2576
def __init__(self, base, _from_transport=None):
2577
super(HTTPS_transport, self).__init__(
2578
base, _from_transport=_from_transport,
2579
ca_certs=ssl_certs.build_path('ca.crt'))
2581
permutations.append((HTTPS_transport,
2582
https_server.HTTPSServer))