1
# Copyright (C) 2005-2010 Canonical Ltd
1
# Copyright (C) 2005 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Base implementation of Transport over http.
19
There are separate implementation modules for each http client implementation.
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""Implementation of Transport over http.
22
from __future__ import absolute_import
39
import http.client as http_client
41
import httplib as http_client
43
import urllib.request as urllib_request
44
except ImportError: # python < 3
45
import urllib2 as urllib_request
47
from urllib.parse import urljoin, splitport, splittype, splithost, urlencode
49
from urlparse import urljoin
50
from urllib import splitport, splittype, splithost, urlencode
52
# TODO: handle_response should be integrated into the http/__init__.py
53
from .response import handle_response
55
# FIXME: Oversimplifying, two kind of exceptions should be
56
# raised, once a request is issued: URLError before we have been
57
# able to process the response, HTTPError after that. Process the
58
# response means we are able to leave the socket clean, so if we
59
# are not able to do that, we should close the connection. The
60
# actual code more or less do that, tests should be written to
63
from ... import __version__ as breezy_version
75
from ...bzr.smart import medium
76
from ...sixish import (
81
from ...trace import mutter
82
from ...transport import (
88
def default_user_agent():
89
return 'Breezy/%s' % breezy_version
93
_ = (ssl.match_hostname, ssl.CertificateError)
94
except AttributeError:
95
# Provide fallbacks for python < 2.7.9
96
def match_hostname(cert, host):
98
'%s cannot be verified, https certificates verification is only'
99
' available for python versions >= 2.7.9' % (host,))
100
ssl.match_hostname = match_hostname
101
ssl.CertificateError = ValueError
104
# Note for packagers: if there is no package providing certs for your platform,
105
# the curl project produces http://curl.haxx.se/ca/cacert.pem weekly.
106
_ssl_ca_certs_known_locations = [
107
u'/etc/ssl/certs/ca-certificates.crt', # Ubuntu/debian/gentoo
108
u'/etc/pki/tls/certs/ca-bundle.crt', # Fedora/CentOS/RH
109
u'/etc/ssl/ca-bundle.pem', # OpenSuse
110
u'/etc/ssl/cert.pem', # OpenSuse
111
u"/usr/local/share/certs/ca-root-nss.crt", # FreeBSD
112
# XXX: Needs checking, can't trust the interweb ;) -- vila 2012-01-25
113
u'/etc/openssl/certs/ca-certificates.crt', # Solaris
117
def default_ca_certs():
118
if sys.platform == 'win32':
119
return os.path.join(os.path.dirname(sys.executable), u"cacert.pem")
120
elif sys.platform == 'darwin':
121
# FIXME: Needs some default value for osx, waiting for osx installers
122
# guys feedback -- vila 2012-01-25
125
# Try known locations for friendly OSes providing the root certificates
126
# without making them hard to use for any https client.
127
for path in _ssl_ca_certs_known_locations:
128
if os.path.exists(path):
131
# A default path that makes sense and will be mentioned in the error
132
# presented to the user, even if not correct for all platforms
133
return _ssl_ca_certs_known_locations[0]
136
def ca_certs_from_store(path):
137
if not os.path.exists(path):
138
raise ValueError("ca certs path %s does not exist" % path)
142
def cert_reqs_from_store(unicode_str):
145
return {"required": ssl.CERT_REQUIRED,
146
"none": ssl.CERT_NONE}[unicode_str]
148
raise ValueError("invalid value %s" % unicode_str)
151
def default_ca_reqs():
152
if sys.platform in ('win32', 'darwin'):
153
# FIXME: Once we get a native access to root certificates there, this
154
# won't needed anymore. See http://pad.lv/920455 -- vila 2012-02-15
160
opt_ssl_ca_certs = config.Option('ssl.ca_certs',
161
from_unicode=ca_certs_from_store,
162
default=default_ca_certs,
165
Path to certification authority certificates to trust.
167
This should be a valid path to a bundle containing all root Certificate
168
Authorities used to verify an https server certificate.
170
Use ssl.cert_reqs=none to disable certificate verification.
173
opt_ssl_cert_reqs = config.Option('ssl.cert_reqs',
174
default=default_ca_reqs,
175
from_unicode=cert_reqs_from_store,
178
Whether to require a certificate from the remote side. (default:required)
181
* none: Certificates ignored
182
* required: Certificates required and validated
185
checked_kerberos = False
189
class addinfourl(urllib_request.addinfourl):
190
'''Replacement addinfourl class compatible with python-2.7's xmlrpclib
192
In python-2.7, xmlrpclib expects that the response object that it receives
193
has a getheader method. http_client.HTTPResponse provides this but
194
urllib_request.addinfourl does not. Add the necessary functions here, ported to
195
use the internal data structures of addinfourl.
198
def getheader(self, name, default=None):
199
if self.headers is None:
200
raise http_client.ResponseNotReady()
201
return self.headers.getheader(name, default)
203
def getheaders(self):
204
if self.headers is None:
205
raise http_client.ResponseNotReady()
206
return list(self.headers.items())
209
class _ReportingFileSocket(object):
211
def __init__(self, filesock, report_activity=None):
212
self.filesock = filesock
213
self._report_activity = report_activity
215
def report_activity(self, size, direction):
216
if self._report_activity:
217
self._report_activity(size, direction)
219
def read(self, size=1):
220
s = self.filesock.read(size)
221
self.report_activity(len(s), 'read')
224
def readline(self, size=-1):
225
s = self.filesock.readline(size)
226
self.report_activity(len(s), 'read')
229
def readinto(self, b):
230
s = self.filesock.readinto(b)
231
self.report_activity(s, 'read')
234
def __getattr__(self, name):
235
return getattr(self.filesock, name)
238
class _ReportingSocket(object):
240
def __init__(self, sock, report_activity=None):
242
self._report_activity = report_activity
244
def report_activity(self, size, direction):
245
if self._report_activity:
246
self._report_activity(size, direction)
248
def sendall(self, s, *args):
249
self.sock.sendall(s, *args)
250
self.report_activity(len(s), 'write')
252
def recv(self, *args):
253
s = self.sock.recv(*args)
254
self.report_activity(len(s), 'read')
257
def makefile(self, mode='r', bufsize=-1):
258
# http_client creates a fileobject that doesn't do buffering, which
259
# makes fp.readline() very expensive because it only reads one byte
260
# at a time. So we wrap the socket in an object that forces
261
# sock.makefile to make a buffered file.
262
fsock = self.sock.makefile(mode, 65536)
263
# And wrap that into a reporting kind of fileobject
264
return _ReportingFileSocket(fsock, self._report_activity)
266
def __getattr__(self, name):
267
return getattr(self.sock, name)
270
# We define our own Response class to keep our http_client pipe clean
271
class Response(http_client.HTTPResponse):
272
"""Custom HTTPResponse, to avoid the need to decorate.
274
http_client prefers to decorate the returned objects, rather
275
than using a custom object.
278
# Some responses have bodies in which we have no interest
279
_body_ignored_responses = [301, 302, 303, 307, 400, 401, 403, 404, 501]
281
# in finish() below, we may have to discard several MB in the worst
282
# case. To avoid buffering that much, we read and discard by chunks
283
# instead. The underlying file is either a socket or a StringIO, so reading
284
# 8k chunks should be fine.
285
_discarded_buf_size = 8192
288
def __init__(self, sock, debuglevel=0, method=None, url=None):
290
super(Response, self).__init__(
291
sock, debuglevel=debuglevel, method=method, url=url)
294
"""Begin to read the response from the server.
296
http_client assumes that some responses get no content and do
297
not even attempt to read the body in that case, leaving
298
the body in the socket, blocking the next request. Let's
299
try to workaround that.
301
http_client.HTTPResponse.begin(self)
302
if self.status in self._body_ignored_responses:
303
if self.debuglevel >= 2:
304
print("For status: [%s], will ready body, length: %s" % (
305
self.status, self.length))
306
if not (self.length is None or self.will_close):
307
# In some cases, we just can't read the body not
308
# even try or we may encounter a 104, 'Connection
309
# reset by peer' error if there is indeed no body
310
# and the server closed the connection just after
311
# having issued the response headers (even if the
312
# headers indicate a Content-Type...)
313
body = self.read(self.length)
314
if self.debuglevel >= 9:
315
# This one can be huge and is generally not interesting
316
print("Consumed body: [%s]" % body)
318
elif self.status == 200:
319
# Whatever the request is, it went ok, so we surely don't want to
320
# close the connection. Some cases are not correctly detected by
321
# http_client.HTTPConnection.getresponse (called by
322
# http_client.HTTPResponse.begin). The CONNECT response for the https
323
# through proxy case is one. Note: the 'will_close' below refers
324
# to the "true" socket between us and the server, whereas the
325
# 'close()' above refers to the copy of that socket created by
326
# http_client for the response itself. So, in the if above we close the
327
# socket to indicate that we are done with the response whereas
328
# below we keep the socket with the server opened.
329
self.will_close = False
332
"""Finish reading the body.
334
In some cases, the client may have left some bytes to read in the
335
body. That will block the next request to succeed if we use a
336
persistent connection. If we don't use a persistent connection, well,
337
nothing will block the next request since a new connection will be
340
:return: the number of bytes left on the socket (may be None)
343
if not self.isclosed():
344
# Make sure nothing was left to be read on the socket
347
while data and self.length:
348
# read() will update self.length
349
data = self.read(min(self.length, self._discarded_buf_size))
352
trace.mutter("%s bytes left on the HTTP socket", pending)
357
# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
358
class AbstractHTTPConnection:
359
"""A custom HTTP(S) Connection, which can reset itself on a bad response"""
361
response_class = Response
363
# When we detect a server responding with the whole file to range requests,
364
# we want to warn. But not below a given thresold.
365
_range_warning_thresold = 1024 * 1024
367
def __init__(self, report_activity=None):
368
self._response = None
369
self._report_activity = report_activity
370
self._ranges_received_whole_file = None
372
def _mutter_connect(self):
373
netloc = '%s:%s' % (self.host, self.port)
374
if self.proxied_host is not None:
375
netloc += '(proxy for %s)' % self.proxied_host
376
trace.mutter('* About to connect() to %s' % netloc)
378
def getresponse(self):
379
"""Capture the response to be able to cleanup"""
380
self._response = http_client.HTTPConnection.getresponse(self)
381
return self._response
383
def cleanup_pipe(self):
384
"""Read the remaining bytes of the last response if any."""
385
if self._response is not None:
387
pending = self._response.finish()
388
# Warn the user (once)
389
if (self._ranges_received_whole_file is None
390
and self._response.status == 200
392
and pending > self._range_warning_thresold):
393
self._ranges_received_whole_file = True
395
'Got a 200 response when asking for multiple ranges,'
396
' does your server at %s:%s support range requests?',
397
self.host, self.port)
398
except socket.error as e:
399
# It's conceivable that the socket is in a bad state here
400
# (including some test cases) and in this case, it doesn't need
401
# cleaning anymore, so no need to fail, we just get rid of the
402
# socket and let callers reconnect
404
or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
407
self._response = None
408
# Preserve our preciousss
411
# Let http_client.HTTPConnection do its housekeeping
413
# Restore our preciousss
416
def _wrap_socket_for_reporting(self, sock):
417
"""Wrap the socket before anybody use it."""
418
self.sock = _ReportingSocket(sock, self._report_activity)
421
class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
423
# XXX: Needs refactoring at the caller level.
424
def __init__(self, host, port=None, proxied_host=None,
425
report_activity=None, ca_certs=None):
426
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
428
http_client.HTTPConnection.__init__(self, host, port)
430
# Use strict=True since we don't support HTTP/0.9
431
http_client.HTTPConnection.__init__(self, host, port, strict=True)
432
self.proxied_host = proxied_host
433
# ca_certs is ignored, it's only relevant for https
436
if 'http' in debug.debug_flags:
437
self._mutter_connect()
438
http_client.HTTPConnection.connect(self)
439
self._wrap_socket_for_reporting(self.sock)
442
class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
444
def __init__(self, host, port=None, key_file=None, cert_file=None,
446
report_activity=None, ca_certs=None):
447
AbstractHTTPConnection.__init__(self, report_activity=report_activity)
449
http_client.HTTPSConnection.__init__(
450
self, host, port, key_file, cert_file)
452
# Use strict=True since we don't support HTTP/0.9
453
http_client.HTTPSConnection.__init__(self, host, port,
454
key_file, cert_file, strict=True)
455
self.proxied_host = proxied_host
456
self.ca_certs = ca_certs
459
if 'http' in debug.debug_flags:
460
self._mutter_connect()
461
http_client.HTTPConnection.connect(self)
462
self._wrap_socket_for_reporting(self.sock)
463
if self.proxied_host is None:
464
self.connect_to_origin()
466
def connect_to_origin(self):
467
# FIXME JRV 2011-12-18: Use location config here?
468
config_stack = config.GlobalStack()
469
cert_reqs = config_stack.get('ssl.cert_reqs')
470
if self.proxied_host is not None:
471
host = self.proxied_host.split(":", 1)[0]
474
if cert_reqs == ssl.CERT_NONE:
475
ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
476
ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
479
if self.ca_certs is None:
480
ca_certs = config_stack.get('ssl.ca_certs')
482
ca_certs = self.ca_certs
485
"No valid trusted SSL CA certificates file set. See "
486
"'brz help ssl.ca_certs' for more information on setting "
489
ssl_context = ssl.create_default_context(
490
purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
491
ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE
493
ssl_context.load_cert_chain(
494
keyfile=self.key_file, certfile=self.cert_file)
495
ssl_context.verify_mode = cert_reqs
496
ssl_sock = ssl_context.wrap_socket(
497
self.sock, server_hostname=self.host)
501
"See `brz help ssl.ca_certs` for how to specify trusted CA"
503
"Pass -Ossl.cert_reqs=none to disable certificate "
504
"verification entirely.\n")
506
# Wrap the ssl socket before anybody use it
507
self._wrap_socket_for_reporting(ssl_sock)
510
class Request(urllib_request.Request):
511
"""A custom Request object.
513
urllib_request determines the request method heuristically (based on
514
the presence or absence of data). We set the method
517
The Request object tracks:
518
- the connection the request will be made on.
519
- the authentication parameters needed to preventively set
520
the authentication header once a first authentication have
524
def __init__(self, method, url, data=None, headers={},
525
origin_req_host=None, unverifiable=False,
526
connection=None, parent=None):
527
urllib_request.Request.__init__(
528
self, url, data, headers,
529
origin_req_host, unverifiable)
531
self.connection = connection
532
# To handle redirections
534
self.redirected_to = None
535
# Unless told otherwise, redirections are not followed
536
self.follow_redirections = False
537
# auth and proxy_auth are dicts containing, at least
538
# (scheme, host, port, realm, user, password, protocol, path).
539
# The dict entries are mostly handled by the AuthHandler.
540
# Some authentication schemes may add more entries.
543
self.proxied_host = None
545
def get_method(self):
548
def set_proxy(self, proxy, type):
549
"""Set the proxy and remember the proxied host."""
551
host, port = splitport(self.host)
553
host, port = splitport(self.get_host())
555
# We need to set the default port ourselves way before it gets set
556
# in the HTTP[S]Connection object at build time.
557
if self.type == 'https':
558
conn_class = HTTPSConnection
560
conn_class = HTTPConnection
561
port = conn_class.default_port
562
self.proxied_host = '%s:%s' % (host, port)
563
urllib_request.Request.set_proxy(self, proxy, type)
564
# When urllib_request makes a https request with our wrapper code and a proxy,
565
# it sets Host to the https proxy, not the host we want to talk to.
566
# I'm fairly sure this is our fault, but what is the cause is an open
567
# question. -- Robert Collins May 8 2010.
568
self.add_unredirected_header('Host', self.proxied_host)
571
class _ConnectRequest(Request):
573
def __init__(self, request):
576
:param request: the first request sent to the proxied host, already
577
processed by the opener (i.e. proxied_host is already set).
579
# We give a fake url and redefine selector or urllib_request will be
581
Request.__init__(self, 'CONNECT', request.get_full_url(),
582
connection=request.connection)
583
if request.proxied_host is None:
584
raise AssertionError()
585
self.proxied_host = request.proxied_host
589
return self.proxied_host
591
def get_selector(self):
594
def set_proxy(self, proxy, type):
595
"""Set the proxy without remembering the proxied host.
597
We already know the proxied host by definition, the CONNECT request
598
occurs only when the connection goes through a proxy. The usual
599
processing (masquerade the request so that the connection is done to
600
the proxy while the request is targeted at another host) does not apply
601
here. In fact, the connection is already established with proxy and we
602
just want to enable the SSL tunneling.
604
urllib_request.Request.set_proxy(self, proxy, type)
607
class ConnectionHandler(urllib_request.BaseHandler):
608
"""Provides connection-sharing by pre-processing requests.
610
urllib_request provides no way to access the HTTPConnection object
611
internally used. But we need it in order to achieve
612
connection sharing. So, we add it to the request just before
613
it is processed, and then we override the do_open method for
614
http[s] requests in AbstractHTTPHandler.
617
handler_order = 1000 # after all pre-processings
619
def __init__(self, report_activity=None, ca_certs=None):
620
self._report_activity = report_activity
621
self.ca_certs = ca_certs
623
def create_connection(self, request, http_connection_class):
626
# Just a bit of paranoia here, this should have been
627
# handled in the higher levels
628
raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
630
# We create a connection (but it will not connect until the first
633
connection = http_connection_class(
634
host, proxied_host=request.proxied_host,
635
report_activity=self._report_activity,
636
ca_certs=self.ca_certs)
637
except http_client.InvalidURL as exception:
638
# There is only one occurrence of InvalidURL in http_client
639
raise urlutils.InvalidURL(request.get_full_url(),
640
extra='nonnumeric port')
644
def capture_connection(self, request, http_connection_class):
645
"""Capture or inject the request connection.
648
- the request have no connection: create a new one,
650
- the request have a connection: this one have been used
651
already, let's capture it, so that we can give it to
652
another transport to be reused. We don't do that
653
ourselves: the Transport object get the connection from
654
a first request and then propagate it, from request to
655
request or to cloned transports.
657
connection = request.connection
658
if connection is None:
660
connection = self.create_connection(request, http_connection_class)
661
request.connection = connection
663
# All connections will pass here, propagate debug level
664
connection.set_debuglevel(DEBUG)
667
def http_request(self, request):
668
return self.capture_connection(request, HTTPConnection)
670
def https_request(self, request):
671
return self.capture_connection(request, HTTPSConnection)
674
class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
675
"""A custom handler for HTTP(S) requests.
677
We overrive urllib_request.AbstractHTTPHandler to get a better
678
control of the connection, the ability to implement new
679
request types and return a response able to cope with
680
persistent connections.
683
# We change our order to be before urllib_request HTTP[S]Handlers
684
# and be chosen instead of them (the first http_open called
688
_default_headers = {'Pragma': 'no-cache',
689
'Cache-control': 'max-age=0',
690
'Connection': 'Keep-Alive',
691
'User-agent': default_user_agent(),
696
urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
698
def http_request(self, request):
699
"""Common headers setting"""
701
for name, value in self._default_headers.items():
702
if name not in request.headers:
703
request.headers[name] = value
704
# FIXME: We may have to add the Content-Length header if
705
# we have data to send.
708
def retry_or_raise(self, http_class, request, first_try):
709
"""Retry the request (once) or raise the exception.
711
urllib_request raises exception of application level kind, we
712
just have to translate them.
714
http_client can raise exceptions of transport level (badly
715
formatted dialog, loss of connexion or socket level
716
problems). In that case we should issue the request again
717
(http_client will close and reopen a new connection if
720
# When an exception occurs, we give back the original
721
# Traceback or the bugs are hard to diagnose.
722
exc_type, exc_val, exc_tb = sys.exc_info()
723
if exc_type == socket.gaierror:
724
# No need to retry, that will not help
726
origin_req_host = request.origin_req_host
728
origin_req_host = request.get_origin_req_host()
729
raise errors.ConnectionError("Couldn't resolve host '%s'"
732
elif isinstance(exc_val, http_client.ImproperConnectionState):
733
# The http_client pipeline is in incorrect state, it's a bug in our
735
reraise(exc_type, exc_val, exc_tb)
738
if self._debuglevel >= 2:
739
print('Received exception: [%r]' % exc_val)
740
print(' On connection: [%r]' % request.connection)
741
method = request.get_method()
742
url = request.get_full_url()
743
print(' Will retry, %s %r' % (method, url))
744
request.connection.close()
745
response = self.do_open(http_class, request, False)
747
if self._debuglevel >= 2:
748
print('Received second exception: [%r]' % exc_val)
749
print(' On connection: [%r]' % request.connection)
750
if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
751
# http_client.BadStatusLine and
752
# http_client.UnknownProtocol indicates that a
753
# bogus server was encountered or a bad
754
# connection (i.e. transient errors) is
755
# experimented, we have already retried once
756
# for that request so we raise the exception.
757
my_exception = errors.InvalidHttpResponse(
758
request.get_full_url(),
759
'Bad status line received',
761
elif (isinstance(exc_val, socket.error) and len(exc_val.args)
762
and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
763
# 10053 == WSAECONNABORTED
764
# 10054 == WSAECONNRESET
765
raise errors.ConnectionReset(
766
"Connection lost while sending request.")
768
# All other exception are considered connection related.
770
# socket errors generally occurs for reasons
771
# far outside our scope, so closing the
772
# connection and retrying is the best we can
775
selector = request.selector
777
selector = request.get_selector()
778
my_exception = errors.ConnectionError(
779
msg='while sending %s %s:' % (request.get_method(),
783
if self._debuglevel >= 2:
784
print('On connection: [%r]' % request.connection)
785
method = request.get_method()
786
url = request.get_full_url()
787
print(' Failed again, %s %r' % (method, url))
788
print(' Will raise: [%r]' % my_exception)
789
reraise(type(my_exception), my_exception, exc_tb)
792
def do_open(self, http_class, request, first_try=True):
793
"""See urllib_request.AbstractHTTPHandler.do_open for the general idea.
795
The request will be retried once if it fails.
797
connection = request.connection
798
if connection is None:
799
raise AssertionError(
800
'Cannot process a request without a connection')
802
# Get all the headers
804
headers.update(request.header_items())
805
headers.update(request.unredirected_hdrs)
806
# Some servers or proxies will choke on headers not properly
807
# cased. http_client/urllib/urllib_request all use capitalize to get canonical
808
# header names, but only python2.5 urllib_request use title() to fix them just
809
# before sending the request. And not all versions of python 2.5 do
810
# that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
812
headers = {name.title(): val for name, val in headers.items()}
815
method = request.get_method()
817
url = request.selector
819
url = request.get_selector()
820
if sys.version_info[:2] >= (3, 6):
821
connection._send_request(method, url,
822
# FIXME: implements 100-continue
823
# None, # We don't send the body yet
825
headers, encode_chunked=False)
827
connection._send_request(method, url,
828
# FIXME: implements 100-continue
829
# None, # We don't send the body yet
832
if 'http' in debug.debug_flags:
833
trace.mutter('> %s %s' % (method, url))
835
for k, v in headers.items():
836
# People are often told to paste -Dhttp output to help
837
# debug. Don't compromise credentials.
838
if k in ('Authorization', 'Proxy-Authorization'):
840
hdrs.append('%s: %s' % (k, v))
841
trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
842
if self._debuglevel >= 1:
843
print('Request sent: [%r] from (%s)'
844
% (request, request.connection.sock.getsockname()))
845
response = connection.getresponse()
846
convert_to_addinfourl = True
847
except (ssl.SSLError, ssl.CertificateError):
848
# Something is wrong with either the certificate or the hostname,
849
# re-trying won't help
851
except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
852
socket.error, http_client.HTTPException):
853
response = self.retry_or_raise(http_class, request, first_try)
854
convert_to_addinfourl = False
857
response.msg = response.reason
860
# FIXME: HTTPConnection does not fully support 100-continue (the
861
# server responses are just ignored)
864
# mutter('Will send the body')
865
# # We can send the body now
866
# body = request.data
868
# raise URLError("No data given")
869
# connection.send(body)
870
# response = connection.getresponse()
872
if self._debuglevel >= 2:
873
print('Receives response: %r' % response)
874
print(' For: %r(%r)' % (request.get_method(),
875
request.get_full_url()))
877
if convert_to_addinfourl:
878
# Shamelessly copied from urllib_request
882
fp = socket._fileobject(r, bufsize=65536)
883
resp = addinfourl(fp, r.msg, req.get_full_url())
886
resp.version = r.version
887
if self._debuglevel >= 2:
888
print('Create addinfourl: %r' % resp)
889
print(' For: %r(%r)' % (request.get_method(),
890
request.get_full_url()))
891
if 'http' in debug.debug_flags:
892
version = 'HTTP/%d.%d'
894
version = version % (resp.version / 10,
897
version = 'HTTP/%r' % resp.version
898
trace.mutter('< %s %s %s' % (version, resp.code,
900
# Use the raw header lines instead of treating resp.info() as a
901
# dict since we may miss duplicated headers otherwise.
902
hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
903
trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
909
class HTTPHandler(AbstractHTTPHandler):
910
"""A custom handler that just thunks into HTTPConnection"""
912
def http_open(self, request):
913
return self.do_open(HTTPConnection, request)
916
class HTTPSHandler(AbstractHTTPHandler):
917
"""A custom handler that just thunks into HTTPSConnection"""
919
https_request = AbstractHTTPHandler.http_request
921
def https_open(self, request):
922
connection = request.connection
923
if connection.sock is None and \
924
connection.proxied_host is not None and \
925
request.get_method() != 'CONNECT': # Don't loop
926
# FIXME: We need a gazillion connection tests here, but we still
927
# miss a https server :-( :
928
# - with and without proxy
929
# - with and without certificate
930
# - with self-signed certificate
931
# - with and without authentication
932
# - with good and bad credentials (especially the proxy auth around
934
# - with basic and digest schemes
935
# - reconnection on errors
936
# - connection persistence behaviour (including reconnection)
938
# We are about to connect for the first time via a proxy, we must
939
# issue a CONNECT request first to establish the encrypted link
940
connect = _ConnectRequest(request)
941
response = self.parent.open(connect)
942
if response.code != 200:
943
raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
944
connect.proxied_host, self.host))
946
connection.cleanup_pipe()
947
# Establish the connection encryption
948
connection.connect_to_origin()
949
# Propagate the connection to the original request
950
request.connection = connection
951
return self.do_open(HTTPSConnection, request)
954
class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
955
"""Handles redirect requests.
957
We have to implement our own scheme because we use a specific
958
Request object and because we want to implement a specific
962
# RFC2616 says that only read requests should be redirected
963
# without interacting with the user. But Breezy uses some
964
# shortcuts to optimize against roundtrips which can leads to
965
# write requests being issued before read requests of
966
# containing dirs can be redirected. So we redirect write
967
# requests in the same way which seems to respect the spirit
968
# of the RFC if not its letter.
970
def redirect_request(self, req, fp, code, msg, headers, newurl):
971
"""See urllib_request.HTTPRedirectHandler.redirect_request"""
972
# We would have preferred to update the request instead
973
# of creating a new one, but the urllib_request.Request object
974
# has a too complicated creation process to provide a
975
# simple enough equivalent update process. Instead, when
976
# redirecting, we only update the following request in
977
# the redirect chain with a reference to the parent
980
# Some codes make no sense in our context and are treated
983
# 300: Multiple choices for different representations of
984
# the URI. Using that mechanisn with Breezy will violate the
985
# protocol neutrality of Transport.
987
# 304: Not modified (SHOULD only occurs with conditional
988
# GETs which are not used by our implementation)
990
# 305: Use proxy. I can't imagine this one occurring in
991
# our context-- vila/20060909
993
# 306: Unused (if the RFC says so...)
995
# If the code is 302 and the request is HEAD, some may
996
# think that it is a sufficent hint that the file exists
997
# and that we MAY avoid following the redirections. But
998
# if we want to be sure, we MUST follow them.
1001
origin_req_host = req.origin_req_host
1003
origin_req_host = req.get_origin_req_host()
1005
if code in (301, 302, 303, 307):
1006
return Request(req.get_method(), newurl,
1007
headers=req.headers,
1008
origin_req_host=origin_req_host,
1010
# TODO: It will be nice to be able to
1011
# detect virtual hosts sharing the same
1012
# IP address, that will allow us to
1013
# share the same connection...
1018
raise urllib_request.HTTPError(
1019
req.get_full_url(), code, msg, headers, fp)
1021
def http_error_302(self, req, fp, code, msg, headers):
1022
"""Requests the redirected to URI.
1024
Copied from urllib_request to be able to clean the pipe of the associated
1025
connection, *before* issuing the redirected request but *after* having
1026
eventually raised an error.
1028
# Some servers (incorrectly) return multiple Location headers
1029
# (so probably same goes for URI). Use first header.
1031
# TODO: Once we get rid of addinfourl objects, the
1032
# following will need to be updated to use correct case
1034
if 'location' in headers:
1035
newurl = headers.get('location')
1036
elif 'uri' in headers:
1037
newurl = headers.get('uri')
1041
newurl = urljoin(req.get_full_url(), newurl)
1043
if self._debuglevel >= 1:
1044
print('Redirected to: %s (followed: %r)' % (newurl,
1045
req.follow_redirections))
1046
if req.follow_redirections is False:
1047
req.redirected_to = newurl
1050
# This call succeeds or raise an error. urllib_request returns
1051
# if redirect_request returns None, but our
1052
# redirect_request never returns None.
1053
redirected_req = self.redirect_request(req, fp, code, msg, headers,
1057
# .redirect_dict has a key url if url was previously visited.
1058
if hasattr(req, 'redirect_dict'):
1059
visited = redirected_req.redirect_dict = req.redirect_dict
1060
if (visited.get(newurl, 0) >= self.max_repeats or
1061
len(visited) >= self.max_redirections):
1062
raise urllib_request.HTTPError(req.get_full_url(), code,
1063
self.inf_msg + msg, headers, fp)
1065
visited = redirected_req.redirect_dict = req.redirect_dict = {}
1066
visited[newurl] = visited.get(newurl, 0) + 1
1068
# We can close the fp now that we are sure that we won't
1069
# use it with HTTPError.
1071
# We have all we need already in the response
1072
req.connection.cleanup_pipe()
1074
return self.parent.open(redirected_req)
1076
http_error_301 = http_error_303 = http_error_307 = http_error_302
1079
class ProxyHandler(urllib_request.ProxyHandler):
1080
"""Handles proxy setting.
1082
Copied and modified from urllib_request to be able to modify the request during
1083
the request pre-processing instead of modifying it at _open time. As we
1084
capture (or create) the connection object during request processing, _open
1087
The main task is to modify the request so that the connection is done to
1088
the proxy while the request still refers to the destination host.
1090
Note: the proxy handling *may* modify the protocol used; the request may be
1091
against an https server proxied through an http proxy. So, https_request
1092
will be called, but later it's really http_open that will be called. This
1093
explains why we don't have to call self.parent.open as the urllib_request did.
1096
# Proxies must be in front
1100
def __init__(self, proxies=None):
1101
urllib_request.ProxyHandler.__init__(self, proxies)
1102
# First, let's get rid of urllib_request implementation
1103
for type, proxy in self.proxies.items():
1104
if self._debuglevel >= 3:
1105
print('Will unbind %s_open for %r' % (type, proxy))
1106
delattr(self, '%s_open' % type)
1108
def bind_scheme_request(proxy, scheme):
1111
scheme_request = scheme + '_request'
1112
if self._debuglevel >= 3:
1113
print('Will bind %s for %r' % (scheme_request, proxy))
1114
setattr(self, scheme_request,
1115
lambda request: self.set_proxy(request, scheme))
1116
# We are interested only by the http[s] proxies
1117
http_proxy = self.get_proxy_env_var('http')
1118
bind_scheme_request(http_proxy, 'http')
1119
https_proxy = self.get_proxy_env_var('https')
1120
bind_scheme_request(https_proxy, 'https')
1122
def get_proxy_env_var(self, name, default_to='all'):
1123
"""Get a proxy env var.
1125
Note that we indirectly rely on
1126
urllib.getproxies_environment taking into account the
1127
uppercased values for proxy variables.
1130
return self.proxies[name.lower()]
1132
if default_to is not None:
1133
# Try to get the alternate environment variable
1135
return self.proxies[default_to]
1140
def proxy_bypass(self, host):
1141
"""Check if host should be proxied or not.
1143
:returns: True to skip the proxy, False otherwise.
1145
no_proxy = self.get_proxy_env_var('no', default_to=None)
1146
bypass = self.evaluate_proxy_bypass(host, no_proxy)
1148
# Nevertheless, there are platform-specific ways to
1150
return urllib.proxy_bypass(host)
1154
def evaluate_proxy_bypass(self, host, no_proxy):
1155
"""Check the host against a comma-separated no_proxy list as a string.
1157
:param host: ``host:port`` being requested
1159
:param no_proxy: comma-separated list of hosts to access directly.
1161
:returns: True to skip the proxy, False not to, or None to
1164
if no_proxy is None:
1165
# All hosts are proxied
1167
hhost, hport = splitport(host)
1168
# Does host match any of the domains mentioned in
1169
# no_proxy ? The rules about what is authorized in no_proxy
1170
# are fuzzy (to say the least). We try to allow most
1171
# commonly seen values.
1172
for domain in no_proxy.split(','):
1173
domain = domain.strip()
1176
dhost, dport = splitport(domain)
1177
if hport == dport or dport is None:
1178
# Protect glob chars
1179
dhost = dhost.replace(".", r"\.")
1180
dhost = dhost.replace("*", r".*")
1181
dhost = dhost.replace("?", r".")
1182
if re.match(dhost, hhost, re.IGNORECASE):
1184
# Nothing explicitly avoid the host
1187
def set_proxy(self, request, type):
1191
host = request.get_host()
1192
if self.proxy_bypass(host):
1195
proxy = self.get_proxy_env_var(type)
1196
if self._debuglevel >= 3:
1197
print('set_proxy %s_request for %r' % (type, proxy))
1198
# FIXME: python 2.5 urlparse provides a better _parse_proxy which can
1199
# grok user:password@host:port as well as
1200
# http://user:password@host:port
1202
parsed_url = transport.ConnectedTransport._split_url(proxy)
1203
if not parsed_url.host:
1204
raise urlutils.InvalidURL(proxy, 'No host component')
1206
if request.proxy_auth == {}:
1207
# No proxy auth parameter are available, we are handling the first
1208
# proxied request, intialize. scheme (the authentication scheme)
1209
# and realm will be set by the AuthHandler
1210
request.proxy_auth = {
1211
'host': parsed_url.host,
1212
'port': parsed_url.port,
1213
'user': parsed_url.user,
1214
'password': parsed_url.password,
1215
'protocol': parsed_url.scheme,
1216
# We ignore path since we connect to a proxy
1218
if parsed_url.port is None:
1219
phost = parsed_url.host
1221
phost = parsed_url.host + ':%d' % parsed_url.port
1222
request.set_proxy(phost, type)
1223
if self._debuglevel >= 3:
1224
print('set_proxy: proxy set to %s://%s' % (type, phost))
1228
class AbstractAuthHandler(urllib_request.BaseHandler):
1229
"""A custom abstract authentication handler for all http authentications.
1231
Provides the meat to handle authentication errors and
1232
preventively set authentication headers after the first
1233
successful authentication.
1235
This can be used for http and proxy, as well as for basic, negotiate and
1236
digest authentications.
1238
This provides an unified interface for all authentication handlers
1239
(urllib_request provides far too many with different policies).
1241
The interaction between this handler and the urllib_request
1242
framework is not obvious, it works as follow:
1244
opener.open(request) is called:
1246
- that may trigger http_request which will add an authentication header
1247
(self.build_header) if enough info is available.
1249
- the request is sent to the server,
1251
- if an authentication error is received self.auth_required is called,
1252
we acquire the authentication info in the error headers and call
1253
self.auth_match to check that we are able to try the
1254
authentication and complete the authentication parameters,
1256
- we call parent.open(request), that may trigger http_request
1257
and will add a header (self.build_header), but here we have
1258
all the required info (keep in mind that the request and
1259
authentication used in the recursive calls are really (and must be)
1260
the *same* objects).
1262
- if the call returns a response, the authentication have been
1263
successful and the request authentication parameters have been updated.
1267
"""The scheme as it appears in the server header (lower cased)"""
1270
"""We don't want to retry authenticating endlessly"""
1272
requires_username = True
1273
"""Whether the auth mechanism requires a username."""
1275
# The following attributes should be defined by daughter
1277
# - auth_required_header: the header received from the server
1278
# - auth_header: the header sent in the request
1281
# We want to know when we enter into an try/fail cycle of
1282
# authentications so we initialize to None to indicate that we aren't
1283
# in such a cycle by default.
1284
self._retry_count = None
1286
def _parse_auth_header(self, server_header):
1287
"""Parse the authentication header.
1289
:param server_header: The value of the header sent by the server
1290
describing the authenticaion request.
1292
:return: A tuple (scheme, remainder) scheme being the first word in the
1293
given header (lower cased), remainder may be None.
1296
scheme, remainder = server_header.split(None, 1)
1298
scheme = server_header
1300
return (scheme.lower(), remainder)
1302
def update_auth(self, auth, key, value):
1303
"""Update a value in auth marking the auth as modified if needed"""
1304
old_value = auth.get(key, None)
1305
if old_value != value:
1307
auth['modified'] = True
1309
def auth_required(self, request, headers):
1310
"""Retry the request if the auth scheme is ours.
1312
:param request: The request needing authentication.
1313
:param headers: The headers for the authentication error response.
1314
:return: None or the response for the authenticated request.
1316
# Don't try to authenticate endlessly
1317
if self._retry_count is None:
1318
# The retry being recusrsive calls, None identify the first retry
1319
self._retry_count = 1
1321
self._retry_count += 1
1322
if self._retry_count > self._max_retry:
1323
# Let's be ready for next round
1324
self._retry_count = None
1327
server_headers = headers.get_all(self.auth_required_header)
1329
server_headers = headers.getheaders(self.auth_required_header)
1330
if not server_headers:
1331
# The http error MUST have the associated
1332
# header. This must never happen in production code.
1333
raise KeyError('%s not found' % self.auth_required_header)
1335
auth = self.get_auth(request)
1336
auth['modified'] = False
1337
# Put some common info in auth if the caller didn't
1338
if auth.get('path', None) is None:
1339
parsed_url = urlutils.URL.from_string(request.get_full_url())
1340
self.update_auth(auth, 'protocol', parsed_url.scheme)
1341
self.update_auth(auth, 'host', parsed_url.host)
1342
self.update_auth(auth, 'port', parsed_url.port)
1343
self.update_auth(auth, 'path', parsed_url.path)
1344
# FIXME: the auth handler should be selected at a single place instead
1345
# of letting all handlers try to match all headers, but the current
1346
# design doesn't allow a simple implementation.
1347
for server_header in server_headers:
1348
# Several schemes can be proposed by the server, try to match each
1350
matching_handler = self.auth_match(server_header, auth)
1351
if matching_handler:
1352
# auth_match may have modified auth (by adding the
1353
# password or changing the realm, for example)
1354
if (request.get_header(self.auth_header, None) is not None
1355
and not auth['modified']):
1356
# We already tried that, give up
1359
# Only the most secure scheme proposed by the server should be
1360
# used, since the handlers use 'handler_order' to describe that
1361
# property, the first handler tried takes precedence, the
1362
# others should not attempt to authenticate if the best one
1364
best_scheme = auth.get('best_scheme', None)
1365
if best_scheme is None:
1366
# At that point, if current handler should doesn't succeed
1367
# the credentials are wrong (or incomplete), but we know
1368
# that the associated scheme should be used.
1369
best_scheme = auth['best_scheme'] = self.scheme
1370
if best_scheme != self.scheme:
19
from bzrlib.transport import Transport, register_transport
20
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
21
NonRelativePath, TransportError)
23
from cStringIO import StringIO
27
from bzrlib.errors import BzrError, BzrCheckError
28
from bzrlib.branch import Branch
29
from bzrlib.trace import mutter
34
mutter("get_url %s" % url)
35
url_f = urllib2.urlopen(url)
38
class HttpTransportError(TransportError):
41
class HttpTransport(Transport):
42
"""This is the transport agent for http:// access.
44
TODO: Implement pipelined versions of all of the *_multi() functions.
47
def __init__(self, base):
48
"""Set the base path where files will be stored."""
49
assert base.startswith('http://') or base.startswith('https://')
50
super(HttpTransport, self).__init__(base)
51
# In the future we might actually connect to the remote host
52
# rather than using get_url
53
# self._connection = None
54
(self._proto, self._host,
55
self._path, self._parameters,
56
self._query, self._fragment) = urlparse.urlparse(self.base)
58
def should_cache(self):
59
"""Return True if the data pulled across should be cached locally.
63
def clone(self, offset=None):
64
"""Return a new HttpTransport with root at self.base + offset
65
For now HttpTransport does not actually connect, so just return
66
a new HttpTransport object.
69
return HttpTransport(self.base)
71
return HttpTransport(self.abspath(offset))
73
def abspath(self, relpath):
74
"""Return the full url to the given relative path.
75
This can be supplied with a string or a list
77
if isinstance(relpath, basestring):
78
relpath_parts = relpath.split('/')
80
# TODO: Don't call this with an array - no magic interfaces
81
relpath_parts = relpath[:]
82
if len(relpath_parts) > 1:
83
if relpath_parts[0] == '':
84
raise ValueError("path %r within branch %r seems to be absolute"
85
% (relpath, self._path))
86
if relpath_parts[-1] == '':
87
raise ValueError("path %r within branch %r seems to be a directory"
88
% (relpath, self._path))
89
basepath = self._path.split('/')
90
if len(basepath) > 0 and basepath[-1] == '':
91
basepath = basepath[:-1]
92
for p in relpath_parts:
94
if len(basepath) == 0:
95
# In most filesystems, a request for the parent
96
# of root, just returns root.
1373
if self.requires_username and auth.get('user', None) is None:
1374
# Without a known user, we can't authenticate
1378
request.connection.cleanup_pipe()
1379
# Retry the request with an authentication header added
1380
response = self.parent.open(request)
1382
self.auth_successful(request, response)
1384
# We are not qualified to handle the authentication.
1385
# Note: the authentication error handling will try all
1386
# available handlers. If one of them authenticates
1387
# successfully, a response will be returned. If none of
1388
# them succeeds, None will be returned and the error
1389
# handler will raise the 401 'Unauthorized' or the 407
1390
# 'Proxy Authentication Required' error.
1393
def add_auth_header(self, request, header):
1394
"""Add the authentication header to the request"""
1395
request.add_unredirected_header(self.auth_header, header)
1397
def auth_match(self, header, auth):
1398
"""Check that we are able to handle that authentication scheme.
1400
The request authentication parameters may need to be
1401
updated with info from the server. Some of these
1402
parameters, when combined, are considered to be the
1403
authentication key, if one of them change the
1404
authentication result may change. 'user' and 'password'
1405
are exampls, but some auth schemes may have others
1406
(digest's nonce is an example, digest's nonce_count is a
1407
*counter-example*). Such parameters must be updated by
1408
using the update_auth() method.
1410
:param header: The authentication header sent by the server.
1411
:param auth: The auth parameters already known. They may be
1413
:returns: True if we can try to handle the authentication.
1415
raise NotImplementedError(self.auth_match)
1417
def build_auth_header(self, auth, request):
1418
"""Build the value of the header used to authenticate.
1420
:param auth: The auth parameters needed to build the header.
1421
:param request: The request needing authentication.
1423
:return: None or header.
1425
raise NotImplementedError(self.build_auth_header)
1427
def auth_successful(self, request, response):
1428
"""The authentification was successful for the request.
1430
Additional infos may be available in the response.
1432
:param request: The succesfully authenticated request.
1433
:param response: The server response (may contain auth info).
1435
# It may happen that we need to reconnect later, let's be ready
1436
self._retry_count = None
1438
def get_user_password(self, auth):
1439
"""Ask user for a password if none is already available.
1441
:param auth: authentication info gathered so far (from the initial url
1442
and then during dialog with the server).
1444
auth_conf = config.AuthenticationConfig()
1445
user = auth.get('user', None)
1446
password = auth.get('password', None)
1447
realm = auth['realm']
1448
port = auth.get('port', None)
1451
user = auth_conf.get_user(auth['protocol'], auth['host'],
1452
port=port, path=auth['path'],
1453
realm=realm, ask=True,
1454
prompt=self.build_username_prompt(auth))
1455
if user is not None and password is None:
1456
password = auth_conf.get_password(
1457
auth['protocol'], auth['host'], user,
1459
path=auth['path'], realm=realm,
1460
prompt=self.build_password_prompt(auth))
1462
return user, password
1464
def _build_password_prompt(self, auth):
1465
"""Build a prompt taking the protocol used into account.
1467
The AuthHandler is used by http and https, we want that information in
1468
the prompt, so we build the prompt from the authentication dict which
1469
contains all the needed parts.
1471
Also, http and proxy AuthHandlers present different prompts to the
1472
user. The daughter classes should implements a public
1473
build_password_prompt using this method.
1475
prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
1476
realm = auth['realm']
1477
if realm is not None:
1478
prompt += u", Realm: '%s'" % realm
1479
prompt += u' password'
1482
def _build_username_prompt(self, auth):
1483
"""Build a prompt taking the protocol used into account.
1485
The AuthHandler is used by http and https, we want that information in
1486
the prompt, so we build the prompt from the authentication dict which
1487
contains all the needed parts.
1489
Also, http and proxy AuthHandlers present different prompts to the
1490
user. The daughter classes should implements a public
1491
build_username_prompt using this method.
1493
prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
1494
realm = auth['realm']
1495
if realm is not None:
1496
prompt += u", Realm: '%s'" % realm
1497
prompt += u' username'
1500
def http_request(self, request):
1501
"""Insert an authentication header if information is available"""
1502
auth = self.get_auth(request)
1503
if self.auth_params_reusable(auth):
1504
self.add_auth_header(
1505
request, self.build_auth_header(auth, request))
1508
https_request = http_request # FIXME: Need test
1511
class NegotiateAuthHandler(AbstractAuthHandler):
1512
"""A authentication handler that handles WWW-Authenticate: Negotiate.
1514
At the moment this handler supports just Kerberos. In the future,
1515
NTLM support may also be added.
1518
scheme = 'negotiate'
1520
requires_username = False
1522
def auth_match(self, header, auth):
1523
scheme, raw_auth = self._parse_auth_header(header)
1524
if scheme != self.scheme:
1526
self.update_auth(auth, 'scheme', scheme)
1527
resp = self._auth_match_kerberos(auth)
1530
# Optionally should try to authenticate using NTLM here
1531
self.update_auth(auth, 'negotiate_response', resp)
1534
def _auth_match_kerberos(self, auth):
1535
"""Try to create a GSSAPI response for authenticating against a host."""
1536
global kerberos, checked_kerberos
1537
if kerberos is None and not checked_kerberos:
1542
checked_kerberos = True
1543
if kerberos is None:
1545
ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
1547
trace.warning('Unable to create GSSAPI context for %s: %d',
1550
ret = kerberos.authGSSClientStep(vc, "")
1552
trace.mutter('authGSSClientStep failed: %d', ret)
1554
return kerberos.authGSSClientResponse(vc)
1556
def build_auth_header(self, auth, request):
1557
return "Negotiate %s" % auth['negotiate_response']
1559
def auth_params_reusable(self, auth):
1560
# If the auth scheme is known, it means a previous
1561
# authentication was successful, all information is
1562
# available, no further checks are needed.
1563
return (auth.get('scheme', None) == 'negotiate' and
1564
auth.get('negotiate_response', None) is not None)
1567
class BasicAuthHandler(AbstractAuthHandler):
1568
"""A custom basic authentication handler."""
1572
auth_regexp = re.compile('realm="([^"]*)"', re.I)
1574
def build_auth_header(self, auth, request):
1575
raw = '%s:%s' % (auth['user'], auth['password'])
1576
auth_header = 'Basic ' + \
1577
base64.b64encode(raw.encode('utf-8')).decode('ascii')
1580
def extract_realm(self, header_value):
1581
match = self.auth_regexp.search(header_value)
1584
realm = match.group(1)
1587
def auth_match(self, header, auth):
1588
scheme, raw_auth = self._parse_auth_header(header)
1589
if scheme != self.scheme:
1592
match, realm = self.extract_realm(raw_auth)
1594
# Put useful info into auth
1595
self.update_auth(auth, 'scheme', scheme)
1596
self.update_auth(auth, 'realm', realm)
1597
if (auth.get('user', None) is None
1598
or auth.get('password', None) is None):
1599
user, password = self.get_user_password(auth)
1600
self.update_auth(auth, 'user', user)
1601
self.update_auth(auth, 'password', password)
1602
return match is not None
1604
def auth_params_reusable(self, auth):
1605
# If the auth scheme is known, it means a previous
1606
# authentication was successful, all information is
1607
# available, no further checks are needed.
1608
return auth.get('scheme', None) == 'basic'
1611
def get_digest_algorithm_impls(algorithm):
1614
if algorithm == 'MD5':
1615
def H(x): return osutils.md5(x).hexdigest()
1616
elif algorithm == 'SHA':
1617
H = osutils.sha_string
1619
def KD(secret, data): return H(
1620
("%s:%s" % (secret, data)).encode('utf-8'))
1624
def get_new_cnonce(nonce, nonce_count):
1625
raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
1626
osutils.rand_chars(8))
1627
return osutils.sha_string(raw.encode('utf-8'))[:16]
1630
class DigestAuthHandler(AbstractAuthHandler):
1631
"""A custom digest authentication handler."""
1634
# Before basic as digest is a bit more secure and should be preferred
1637
def auth_params_reusable(self, auth):
1638
# If the auth scheme is known, it means a previous
1639
# authentication was successful, all information is
1640
# available, no further checks are needed.
1641
return auth.get('scheme', None) == 'digest'
1643
def auth_match(self, header, auth):
1644
scheme, raw_auth = self._parse_auth_header(header)
1645
if scheme != self.scheme:
1648
# Put the requested authentication info into a dict
1649
req_auth = urllib_request.parse_keqv_list(
1650
urllib_request.parse_http_list(raw_auth))
1652
# Check that we can handle that authentication
1653
qop = req_auth.get('qop', None)
1654
if qop != 'auth': # No auth-int so far
1657
H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
1661
realm = req_auth.get('realm', None)
1662
# Put useful info into auth
1663
self.update_auth(auth, 'scheme', scheme)
1664
self.update_auth(auth, 'realm', realm)
1665
if auth.get('user', None) is None or auth.get('password', None) is None:
1666
user, password = self.get_user_password(auth)
1667
self.update_auth(auth, 'user', user)
1668
self.update_auth(auth, 'password', password)
1671
if req_auth.get('algorithm', None) is not None:
1672
self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1673
nonce = req_auth['nonce']
1674
if auth.get('nonce', None) != nonce:
1675
# A new nonce, never used
1676
self.update_auth(auth, 'nonce_count', 0)
1677
self.update_auth(auth, 'nonce', nonce)
1678
self.update_auth(auth, 'qop', qop)
1679
auth['opaque'] = req_auth.get('opaque', None)
1681
# Some required field is not there
1686
def build_auth_header(self, auth, request):
1688
selector = request.selector
1690
selector = request.get_selector()
1691
url_scheme, url_selector = splittype(selector)
1692
sel_host, uri = splithost(url_selector)
1695
(auth['user'], auth['realm'], auth['password'])).encode('utf-8')
1696
A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
1698
nonce = auth['nonce']
1701
nonce_count = auth['nonce_count'] + 1
1702
ncvalue = '%08x' % nonce_count
1703
cnonce = get_new_cnonce(nonce, nonce_count)
1705
H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1706
nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1707
request_digest = KD(H(A1), nonce_data)
1710
header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
1713
header += ', uri="%s"' % uri
1714
header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1715
header += ', qop="%s"' % qop
1716
header += ', response="%s"' % request_digest
1717
# Append the optional fields
1718
opaque = auth.get('opaque', None)
1720
header += ', opaque="%s"' % opaque
1721
if auth.get('algorithm', None):
1722
header += ', algorithm="%s"' % auth.get('algorithm')
1724
# We have used the nonce once more, update the count
1725
auth['nonce_count'] = nonce_count
1730
class HTTPAuthHandler(AbstractAuthHandler):
1731
"""Custom http authentication handler.
1733
Send the authentication preventively to avoid the roundtrip
1734
associated with the 401 error and keep the revelant info in
1735
the auth request attribute.
1738
auth_required_header = 'www-authenticate'
1739
auth_header = 'Authorization'
1741
def get_auth(self, request):
1742
"""Get the auth params from the request"""
1745
def set_auth(self, request, auth):
1746
"""Set the auth params for the request"""
1749
def build_password_prompt(self, auth):
1750
return self._build_password_prompt(auth)
1752
def build_username_prompt(self, auth):
1753
return self._build_username_prompt(auth)
1755
def http_error_401(self, req, fp, code, msg, headers):
1756
return self.auth_required(req, headers)
1759
class ProxyAuthHandler(AbstractAuthHandler):
1760
"""Custom proxy authentication handler.
1762
Send the authentication preventively to avoid the roundtrip
1763
associated with the 407 error and keep the revelant info in
1764
the proxy_auth request attribute..
1767
auth_required_header = 'proxy-authenticate'
1768
# FIXME: the correct capitalization is Proxy-Authorization,
1769
# but python-2.4 urllib_request.Request insist on using capitalize()
1770
# instead of title().
1771
auth_header = 'Proxy-authorization'
1773
def get_auth(self, request):
1774
"""Get the auth params from the request"""
1775
return request.proxy_auth
1777
def set_auth(self, request, auth):
1778
"""Set the auth params for the request"""
1779
request.proxy_auth = auth
1781
def build_password_prompt(self, auth):
1782
prompt = self._build_password_prompt(auth)
1783
prompt = u'Proxy ' + prompt
1786
def build_username_prompt(self, auth):
1787
prompt = self._build_username_prompt(auth)
1788
prompt = u'Proxy ' + prompt
1791
def http_error_407(self, req, fp, code, msg, headers):
1792
return self.auth_required(req, headers)
1795
class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
1796
"""Custom http basic authentication handler"""
1799
class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
1800
"""Custom proxy basic authentication handler"""
1803
class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
1804
"""Custom http basic authentication handler"""
1807
class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
1808
"""Custom proxy basic authentication handler"""
1811
class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
1812
"""Custom http negotiate authentication handler"""
1815
class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
1816
"""Custom proxy negotiate authentication handler"""
1819
class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
1820
"""Process HTTP error responses.
1822
We don't really process the errors, quite the contrary
1823
instead, we leave our Transport handle them.
1826
accepted_errors = [200, # Ok
1830
206, # Partial content
1834
405, # Method not allowed
1837
501, # Not implemented
1839
"""The error codes the caller will handle.
1841
This can be specialized in the request on a case-by case basis, but the
1842
common cases are covered here.
1845
def http_response(self, request, response):
1846
code, msg, hdrs = response.code, response.msg, response.info()
1848
if code not in self.accepted_errors:
1849
response = self.parent.error('http', request, response,
1853
https_response = http_response
1856
class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
1857
"""Translate common errors into Breezy Exceptions"""
1859
def http_error_default(self, req, fp, code, msg, hdrs):
1861
raise errors.TransportError(
1862
'Server refuses to fulfill the request (403 Forbidden)'
1863
' for %s' % req.get_full_url())
1865
raise errors.InvalidHttpResponse(req.get_full_url(),
1866
'Unable to handle http code %d: %s'
1870
class Opener(object):
1871
"""A wrapper around urllib_request.build_opener
1873
Daughter classes can override to build their own specific opener
1875
# TODO: Provides hooks for daughter classes.
1878
connection=ConnectionHandler,
1879
redirect=HTTPRedirectHandler,
1880
error=HTTPErrorProcessor,
1881
report_activity=None,
1883
self._opener = urllib_request.build_opener(
1884
connection(report_activity=report_activity, ca_certs=ca_certs),
1887
HTTPBasicAuthHandler(),
1888
HTTPDigestAuthHandler(),
1889
HTTPNegotiateAuthHandler(),
1890
ProxyBasicAuthHandler(),
1891
ProxyDigestAuthHandler(),
1892
ProxyNegotiateAuthHandler(),
1895
HTTPDefaultErrorHandler,
1898
self.open = self._opener.open
1900
# When dealing with handler order, it's easy to mess
1901
# things up, the following will help understand which
1902
# handler is used, when and for what.
1904
pprint.pprint(self._opener.__dict__)
1907
class HttpTransport(ConnectedTransport):
1908
"""HTTP Client implementations.
1910
The protocol can be given as e.g. http+urllib://host/ to use a particular
1914
# _unqualified_scheme: "http" or "https"
1915
# _scheme: may have "+pycurl", etc
1917
# In order to debug we have to issue our traces in sync with
1918
# httplib, which use print :(
1921
def __init__(self, base, _from_transport=None, ca_certs=None):
1922
"""Set the base path where files will be stored."""
1923
proto_match = re.match(r'^(https?)(\+\w+)?://', base)
1925
raise AssertionError("not a http url: %r" % base)
1926
self._unqualified_scheme = proto_match.group(1)
1927
super(HttpTransport, self).__init__(
1928
base, _from_transport=_from_transport)
1930
# range hint is handled dynamically throughout the life
1931
# of the transport object. We start by trying multi-range
1932
# requests and if the server returns bogus results, we
1933
# retry with single range requests and, finally, we
1934
# forget about range if the server really can't
1935
# understand. Once acquired, this piece of info is
1936
# propagated to clones.
1937
if _from_transport is not None:
1938
self._range_hint = _from_transport._range_hint
1939
self._opener = _from_transport._opener
1941
self._range_hint = 'multi'
1942
self._opener = Opener(
1943
report_activity=self._report_activity, ca_certs=ca_certs)
1945
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
1946
body = urlopen_kw.pop('body', None)
1947
if fields is not None:
1948
data = urlencode(fields).encode()
1949
if body is not None:
1951
'body and fields are mutually exclusive')
1956
request = Request(method, url, data, headers)
1957
request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)
1959
raise NotImplementedError(
1960
'unknown arguments: %r' % urlopen_kw.keys())
1961
connection = self._get_connection()
1962
if connection is not None:
1963
# Give back shared info
1964
request.connection = connection
1965
(auth, proxy_auth) = self._get_credentials()
1966
# Clean the httplib.HTTPConnection pipeline in case the previous
1967
# request couldn't do it
1968
connection.cleanup_pipe()
1970
# First request, initialize credentials.
1971
# scheme and realm will be set by the _urllib2_wrappers.AuthHandler
1972
auth = self._create_auth()
1973
# Proxy initialization will be done by the first proxied request
1975
# Ensure authentication info is provided
1977
request.proxy_auth = proxy_auth
1979
if self._debuglevel > 0:
1980
print('perform: %s base: %s, url: %s' % (request.method, self.base,
1981
request.get_full_url()))
1982
response = self._opener.open(request)
1983
if self._get_connection() is not request.connection:
1984
# First connection or reconnection
1985
self._set_connection(request.connection,
1986
(request.auth, request.proxy_auth))
1988
# http may change the credentials while keeping the
1990
self._update_credentials((request.auth, request.proxy_auth))
1992
code = response.code
1993
if (request.follow_redirections is False
1994
and code in (301, 302, 303, 307)):
1995
raise errors.RedirectRequested(request.get_full_url(),
1996
request.redirected_to,
1997
is_permanent=(code == 301))
1999
if request.redirected_to is not None:
2000
trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),
2001
request.redirected_to))
2003
class Urllib3LikeResponse(object):
2005
def __init__(self, actual):
2006
self._actual = actual
2009
def getheader(self, name, default=None):
2010
if self._actual.headers is None:
2011
raise http_client.ResponseNotReady()
2013
return self._actual.headers.get(name, default)
2015
return self._actual.headers.getheader(name, default)
2017
def getheaders(self):
2018
if self._actual.headers is None:
2019
raise http_client.ResponseNotReady()
2020
return list(self._actual.headers.items())
2024
return self._actual.code
2028
return self._actual.reason
2032
if self._data is None:
2033
self._data = self._actual.read()
2038
charset = cgi.parse_header(
2039
self._actual.headers['Content-Type'])[1].get('charset')
2040
return self.data.decode(charset)
2042
def read(self, amt=None):
2043
return self._actual.read(amt)
2045
def readlines(self):
2046
return self._actual.readlines()
2048
def readline(self, size=-1):
2049
return self._actual.readline(size)
2051
return Urllib3LikeResponse(response)
2053
def disconnect(self):
2054
connection = self._get_connection()
2055
if connection is not None:
99
elif p == '.' or p == '':
103
# Possibly, we could use urlparse.urljoin() here, but
104
# I'm concerned about when it chooses to strip the last
105
# portion of the path, and when it doesn't.
106
path = '/'.join(basepath)
107
return urlparse.urlunparse((self._proto,
108
self._host, path, '', '', ''))
2058
110
def has(self, relpath):
2059
111
"""Does the target location exist?
113
TODO: HttpTransport.has() should use a HEAD request,
114
not a full GET request.
116
TODO: This should be changed so that we don't use
117
urllib2 and get an exception, the code path would be
118
cleaner if we just do an http HEAD request, and parse
2061
response = self._head(relpath)
2063
code = response.status
2064
if code == 200: # "ok",
122
f = get_url(self.abspath(relpath))
123
# Without the read and then close()
124
# we tend to have busy sockets.
130
except urllib2.URLError:
133
if e.errno == errno.ENOENT:
135
raise HttpTransportError(orig_error=e)
2069
def get(self, relpath):
137
def get(self, relpath, decode=False):
2070
138
"""Get the file at the given relative path.
2072
140
:param relpath: The relative path to the file
2074
code, response_file = self._get(relpath, None)
2075
return response_file
2077
def _get(self, relpath, offsets, tail_amount=0):
2078
"""Get a file, or part of a file.
2080
:param relpath: Path relative to transport base URL
2081
:param offsets: None to get the whole file;
2082
or a list of _CoalescedOffset to fetch parts of a file.
2083
:param tail_amount: The amount to get from the end of the file.
2085
:returns: (http_code, result_file)
2087
abspath = self._remote_path(relpath)
2089
if offsets or tail_amount:
2090
range_header = self._attempted_range_header(offsets, tail_amount)
2091
if range_header is not None:
2092
bytes = 'bytes=' + range_header
2093
headers = {'Range': bytes}
2097
response = self.request('GET', abspath, headers=headers)
2099
if response.status == 404: # not found
2100
raise errors.NoSuchFile(abspath)
2101
elif response.status == 416:
2102
# We don't know which, but one of the ranges we specified was
2104
raise errors.InvalidHttpRange(abspath, range_header,
2105
'Server return code %d' % response.status)
2106
elif response.status == 400:
2108
# We don't know which, but one of the ranges we specified was
2110
raise errors.InvalidHttpRange(
2111
abspath, range_header,
2112
'Server return code %d' % response.status)
2114
raise errors.InvalidHttpResponse(
2115
abspath, 'Unexpected status %d' % response.status)
2116
elif response.status not in (200, 206):
2117
raise errors.InvalidHttpResponse(
2118
abspath, 'Unexpected status %d' % response.status)
2120
data = handle_response(
2121
abspath, response.status, response.getheader, response)
2122
return response.status, data
2124
def _remote_path(self, relpath):
2125
"""See ConnectedTransport._remote_path.
2127
user and passwords are not embedded in the path provided to the server.
2129
url = self._parsed_url.clone(relpath)
2130
url.user = url.quoted_user = None
2131
url.password = url.quoted_password = None
2132
url.scheme = self._unqualified_scheme
2135
def _create_auth(self):
2136
"""Returns a dict containing the credentials provided at build time."""
2137
auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
2138
user=self._parsed_url.user, password=self._parsed_url.password,
2139
protocol=self._unqualified_scheme,
2140
path=self._parsed_url.path)
2143
def get_smart_medium(self):
2144
"""See Transport.get_smart_medium."""
2145
if self._medium is None:
2146
# Since medium holds some state (smart server probing at least), we
2147
# need to keep it around. Note that this is needed because medium
2148
# has the same 'base' attribute as the transport so it can't be
2149
# shared between transports having different bases.
2150
self._medium = SmartClientHTTPMedium(self)
2153
def _degrade_range_hint(self, relpath, ranges):
2154
if self._range_hint == 'multi':
2155
self._range_hint = 'single'
2156
mutter('Retry "%s" with single range request' % relpath)
2157
elif self._range_hint == 'single':
2158
self._range_hint = None
2159
mutter('Retry "%s" without ranges' % relpath)
2161
# We tried all the tricks, but nothing worked, caller must reraise.
2165
# _coalesce_offsets is a helper for readv, it try to combine ranges without
2166
# degrading readv performances. _bytes_to_read_before_seek is the value
2167
# used for the limit parameter and has been tuned for other transports. For
2168
# HTTP, the name is inappropriate but the parameter is still useful and
2169
# helps reduce the number of chunks in the response. The overhead for a
2170
# chunk (headers, length, footer around the data itself is variable but
2171
# around 50 bytes. We use 128 to reduce the range specifiers that appear in
2172
# the header, some servers (notably Apache) enforce a maximum length for a
2173
# header and issue a '400: Bad request' error when too much ranges are
2175
_bytes_to_read_before_seek = 128
2176
# No limit on the offset number that get combined into one, we are trying
2177
# to avoid downloading the whole file.
2178
_max_readv_combine = 0
2179
# By default Apache has a limit of ~400 ranges before replying with a 400
2180
# Bad Request. So we go underneath that amount to be safe.
2181
_max_get_ranges = 200
2182
# We impose no limit on the range size. But see _pycurl.py for a different
2186
def _readv(self, relpath, offsets):
2187
"""Get parts of the file at the given relative path.
2189
:param offsets: A list of (offset, size) tuples.
2190
:param return: A list or generator of (offset, data) tuples
2192
# offsets may be a generator, we will iterate it several times, so
2194
offsets = list(offsets)
2197
retried_offset = None
2201
# Coalesce the offsets to minimize the GET requests issued
2202
sorted_offsets = sorted(offsets)
2203
coalesced = self._coalesce_offsets(
2204
sorted_offsets, limit=self._max_readv_combine,
2205
fudge_factor=self._bytes_to_read_before_seek,
2206
max_size=self._get_max_size)
2208
# Turn it into a list, we will iterate it several times
2209
coalesced = list(coalesced)
2210
if 'http' in debug.debug_flags:
2211
mutter('http readv of %s offsets => %s collapsed %s',
2212
relpath, len(offsets), len(coalesced))
2214
# Cache the data read, but only until it's been used
2216
# We will iterate on the data received from the GET requests and
2217
# serve the corresponding offsets respecting the initial order. We
2218
# need an offset iterator for that.
2219
iter_offsets = iter(offsets)
2221
cur_offset_and_size = next(iter_offsets)
2222
except StopIteration:
2226
for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
2227
# Split the received chunk
2228
for offset, size in cur_coal.ranges:
2229
start = cur_coal.start + offset
2230
rfile.seek(start, os.SEEK_SET)
2231
data = rfile.read(size)
2232
data_len = len(data)
2233
if data_len != size:
2234
raise errors.ShortReadvError(relpath, start, size,
2236
if (start, size) == cur_offset_and_size:
2237
# The offset requested are sorted as the coalesced
2238
# ones, no need to cache. Win !
2239
yield cur_offset_and_size[0], data
2241
cur_offset_and_size = next(iter_offsets)
2242
except StopIteration:
2245
# Different sorting. We need to cache.
2246
data_map[(start, size)] = data
2248
# Yield everything we can
2249
while cur_offset_and_size in data_map:
2250
# Clean the cached data since we use it
2251
# XXX: will break if offsets contains duplicates --
2253
this_data = data_map.pop(cur_offset_and_size)
2254
yield cur_offset_and_size[0], this_data
2256
cur_offset_and_size = next(iter_offsets)
2257
except StopIteration:
2260
except (errors.ShortReadvError, errors.InvalidRange,
2261
errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:
2262
mutter('Exception %r: %s during http._readv', e, e)
2263
if (not isinstance(e, errors.ShortReadvError)
2264
or retried_offset == cur_offset_and_size):
2265
# We don't degrade the range hint for ShortReadvError since
2266
# they do not indicate a problem with the server ability to
2267
# handle ranges. Except when we fail to get back a required
2268
# offset twice in a row. In that case, falling back to
2269
# single range or whole file should help.
2270
if not self._degrade_range_hint(relpath, coalesced):
2272
# Some offsets may have been already processed, so we retry
2273
# only the unsuccessful ones.
2274
offsets = [cur_offset_and_size] + [o for o in iter_offsets]
2275
retried_offset = cur_offset_and_size
2278
def _coalesce_readv(self, relpath, coalesced):
2279
"""Issue several GET requests to satisfy the coalesced offsets"""
2281
def get_and_yield(relpath, coalesced):
2283
# Note that the _get below may raise
2284
# errors.InvalidHttpRange. It's the caller's responsibility to
2285
# decide how to retry since it may provide different coalesced
2287
code, rfile = self._get(relpath, coalesced)
2288
for coal in coalesced:
2291
if self._range_hint is None:
2292
# Download whole file
2293
for c, rfile in get_and_yield(relpath, coalesced):
2296
total = len(coalesced)
2297
if self._range_hint == 'multi':
2298
max_ranges = self._max_get_ranges
2299
elif self._range_hint == 'single':
2302
raise AssertionError("Unknown _range_hint %r"
2303
% (self._range_hint,))
2304
# TODO: Some web servers may ignore the range requests and return
2305
# the whole file, we may want to detect that and avoid further
2307
# Hint: test_readv_multiple_get_requests will fail once we do that
2310
for coal in coalesced:
2311
if ((self._get_max_size > 0
2312
and cumul + coal.length > self._get_max_size) or
2313
len(ranges) >= max_ranges):
2314
# Get that much and yield
2315
for c, rfile in get_and_yield(relpath, ranges):
2317
# Restart with the current offset
2322
cumul += coal.length
2323
# Get the rest and yield
2324
for c, rfile in get_and_yield(relpath, ranges):
2327
def recommended_page_size(self):
2328
"""See Transport.recommended_page_size().
2330
For HTTP we suggest a large page size to reduce the overhead
2331
introduced by latency.
2335
def _post(self, body_bytes):
2336
"""POST body_bytes to .bzr/smart on this transport.
2338
:returns: (response code, response body file-like object).
2340
# TODO: Requiring all the body_bytes to be available at the beginning of
2341
# the POST may require large client buffers. It would be nice to have
2342
# an interface that allows streaming via POST when possible (and
2343
# degrades to a local buffer when not).
2344
abspath = self._remote_path('.bzr/smart')
2345
response = self.request(
2346
'POST', abspath, body=body_bytes,
2347
headers={'Content-Type': 'application/octet-stream'})
2348
if response.status not in (200, 403):
2349
raise errors.InvalidHttpResponse(
2350
abspath, 'Unexpected status %d' % response.status)
2351
code = response.status
2352
data = handle_response(
2353
abspath, code, response.getheader, response)
2356
def _head(self, relpath):
2357
"""Request the HEAD of a file.
2359
Performs the request and leaves callers handle the results.
2361
abspath = self._remote_path(relpath)
2362
response = self.request('HEAD', abspath)
2363
if response.status not in (200, 404):
2364
raise errors.InvalidHttpResponse(
2365
abspath, 'Unexpected status %d' % response.status)
2369
raise NotImplementedError(self._post)
2371
def put_file(self, relpath, f, mode=None):
2372
"""Copy the file-like object into the location.
143
return get_url(self.abspath(relpath))
144
except (BzrError, urllib2.URLError, IOError), e:
145
raise NoSuchFile(msg = "Error retrieving %s: %s"
146
% (self.abspath(relpath), str(e)),
149
def put(self, relpath, f):
150
"""Copy the file-like or string object into the location.
2374
152
:param relpath: Location to put the contents, relative to base.
2375
:param f: File-like object.
153
:param f: File-like or string object.
2377
raise errors.TransportNotPossible('http PUT not supported')
155
raise TransportNotPossible('http PUT not supported')
2379
def mkdir(self, relpath, mode=None):
157
def mkdir(self, relpath):
2380
158
"""Create a directory at the given path."""
2381
raise errors.TransportNotPossible('http does not support mkdir()')
2383
def rmdir(self, relpath):
2384
"""See Transport.rmdir."""
2385
raise errors.TransportNotPossible('http does not support rmdir()')
2387
def append_file(self, relpath, f, mode=None):
159
raise TransportNotPossible('http does not support mkdir()')
161
def append(self, relpath, f):
2388
162
"""Append the text in the file-like object into the final
2391
raise errors.TransportNotPossible('http does not support append()')
165
raise TransportNotPossible('http does not support append()')
2393
167
def copy(self, rel_from, rel_to):
2394
168
"""Copy the item at rel_from to the location at rel_to"""
2395
raise errors.TransportNotPossible('http does not support copy()')
169
raise TransportNotPossible('http does not support copy()')
2397
def copy_to(self, relpaths, other, mode=None, pb=None):
171
def copy_to(self, relpaths, other, pb=None):
2398
172
"""Copy a set of entries from self into another Transport.
2400
174
:param relpaths: A list/generator of entries to be copied.