1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""http/https transport using pycurl"""
19
# TODO: test reporting of http errors
21
# TODO: Transport option to control caching of particular requests; broadly we
22
# would want to offer "caching allowed" or "must revalidate", depending on
23
# whether we expect a particular file will be modified after it's committed.
24
# It's probably safer to just always revalidate. mbp 20060321
26
# TODO: Some refactoring could be done to avoid the strange idiom
27
# used to capture data and headers while setting up the request
28
# (and having to pass 'header' to _curl_perform to handle
29
# redirections) . This could be achieved by creating a
30
# specialized Curl object and returning code, headers and data
31
# from _curl_perform. Not done because we may deprecate pycurl in the
32
# future -- vila 20070212
35
from cStringIO import StringIO
43
__version__ as bzrlib_version,
46
from bzrlib.trace import mutter
47
from bzrlib.transport.http import (
55
except ImportError, e:
56
mutter("failed to import pycurl: %s", e)
57
raise errors.DependencyNotPresent('pycurl', e)
60
# see if we can actually initialize PyCurl - sometimes it will load but
61
# fail to start up due to this bug:
63
# 32. (At least on Windows) If libcurl is built with c-ares and there's
64
# no DNS server configured in the system, the ares_init() call fails and
65
# thus curl_easy_init() fails as well. This causes weird effects for
66
# people who use numerical IP addresses only.
68
# reported by Alexander Belchenko, 2006-04-26
70
except pycurl.error, e:
71
mutter("failed to initialize pycurl: %s", e)
72
raise errors.DependencyNotPresent('pycurl', e)
77
def _get_pycurl_errcode(symbol, default):
79
Returns the numerical error code for a symbol defined by pycurl.
81
Different pycurl implementations define different symbols for error
82
codes. Old versions never define some symbols (wether they can return the
83
corresponding error code or not). The following addresses the problem by
84
defining the symbols we care about. Note: this allows to define symbols
85
for errors that older versions will never return, which is fine.
87
return pycurl.__dict__.get(symbol, default)
89
CURLE_SSL_CACERT_BADFILE = _get_pycurl_errcode('E_SSL_CACERT_BADFILE', 77)
90
CURLE_COULDNT_CONNECT = _get_pycurl_errcode('E_COULDNT_CONNECT', 7)
91
CURLE_COULDNT_RESOLVE_HOST = _get_pycurl_errcode('E_COULDNT_RESOLVE_HOST', 6)
92
CURLE_COULDNT_RESOLVE_PROXY = _get_pycurl_errcode('E_COULDNT_RESOLVE_PROXY', 5)
93
CURLE_GOT_NOTHING = _get_pycurl_errcode('E_GOT_NOTHING', 52)
94
CURLE_PARTIAL_FILE = _get_pycurl_errcode('E_PARTIAL_FILE', 18)
95
CURLE_SEND_ERROR = _get_pycurl_errcode('E_SEND_ERROR', 55)
98
class PyCurlTransport(HttpTransportBase):
99
"""http client transport using pycurl
101
PyCurl is a Python binding to the C "curl" multiprotocol client.
103
This transport can be significantly faster than the builtin
104
Python client. Advantages include: DNS caching.
107
def __init__(self, base, _from_transport=None):
108
super(PyCurlTransport, self).__init__(base,
109
_from_transport=_from_transport)
110
if base.startswith('https'):
111
# Check availability of https into pycurl supported
113
supported = pycurl.version_info()[8]
114
if 'https' not in supported:
115
raise errors.DependencyNotPresent('pycurl', 'no https support')
116
self.cabundle = ca_bundle.get_ca_path()
119
connection = self._get_connection()
120
if connection is None:
121
# First connection ever. There is no credentials for pycurl, either
122
# the password was embedded in the URL or it's not needed. The
123
# connection for pycurl is just the Curl object, it will not
124
# connect to the http server until the first request (which had
126
connection = pycurl.Curl()
127
# First request, initialize credentials.
128
auth = self._create_auth()
129
# Proxy handling is out of reach, so we punt
130
self._set_connection(connection, auth)
133
def has(self, relpath):
134
"""See Transport.has()"""
135
# We set NO BODY=0 in _get_full, so it should be safe
136
# to re-use the non-range curl object
137
curl = self._get_curl()
138
abspath = self._remote_path(relpath)
139
curl.setopt(pycurl.URL, abspath)
140
self._set_curl_options(curl)
141
curl.setopt(pycurl.HTTPGET, 1)
142
# don't want the body - ie just do a HEAD request
143
# This means "NO BODY" not 'nobody'
144
curl.setopt(pycurl.NOBODY, 1)
145
# But we need headers to handle redirections
147
curl.setopt(pycurl.HEADERFUNCTION, header.write)
148
# In some erroneous cases, pycurl will emit text on
149
# stdout if we don't catch it (see InvalidStatus tests
150
# for one such occurrence).
151
blackhole = StringIO()
152
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
153
self._curl_perform(curl, header)
154
code = curl.getinfo(pycurl.HTTP_CODE)
155
if code == 404: # not found
157
elif code == 200: # "ok"
160
self._raise_curl_http_error(curl)
162
def _get(self, relpath, offsets, tail_amount=0):
163
# This just switches based on the type of request
164
if offsets is not None or tail_amount not in (0, None):
165
return self._get_ranged(relpath, offsets, tail_amount=tail_amount)
167
return self._get_full(relpath)
169
def _setup_get_request(self, curl, relpath):
170
# Make sure we do a GET request. versions > 7.14.1 also set the
171
# NO BODY flag, but we'll do it ourselves in case it is an older
173
curl.setopt(pycurl.NOBODY, 0)
174
curl.setopt(pycurl.HTTPGET, 1)
175
return self._setup_request(curl, relpath)
177
def _setup_request(self, curl, relpath):
178
"""Do the common setup stuff for making a request
180
:param curl: The curl object to place the request on
181
:param relpath: The relative path that we want to get
182
:return: (abspath, data, header)
184
data: file that will be filled with the body
185
header: file that will be filled with the headers
187
abspath = self._remote_path(relpath)
188
curl.setopt(pycurl.URL, abspath)
189
self._set_curl_options(curl)
193
curl.setopt(pycurl.WRITEFUNCTION, data.write)
194
curl.setopt(pycurl.HEADERFUNCTION, header.write)
196
return abspath, data, header
198
def _get_full(self, relpath):
199
"""Make a request for the entire file"""
200
curl = self._get_curl()
201
abspath, data, header = self._setup_get_request(curl, relpath)
202
self._curl_perform(curl, header)
204
code = curl.getinfo(pycurl.HTTP_CODE)
208
raise errors.NoSuchFile(abspath)
210
self._raise_curl_http_error(
211
curl, 'expected 200 or 404 for full response.')
215
# The parent class use 0 to minimize the requests, but since we can't
216
# exploit the results as soon as they are received (pycurl limitation) we'd
217
# better issue more requests and provide a more responsive UI do the cost
218
# of more latency costs.
219
# If you modify this, think about modifying the comment in http/__init__.py
221
_get_max_size = 4 * 1024 * 1024
223
def _get_ranged(self, relpath, offsets, tail_amount):
224
"""Make a request for just part of the file."""
225
curl = self._get_curl()
226
abspath, data, header = self._setup_get_request(curl, relpath)
228
range_header = self._attempted_range_header(offsets, tail_amount)
229
if range_header is None:
230
# Forget ranges, the server can't handle them
231
return self._get_full(relpath)
233
self._curl_perform(curl, header, ['Range: bytes=%s' % range_header])
236
code = curl.getinfo(pycurl.HTTP_CODE)
238
if code == 404: # not found
239
raise errors.NoSuchFile(abspath)
240
elif code in (400, 416):
241
# We don't know which, but one of the ranges we specified was
243
raise errors.InvalidHttpRange(abspath, range_header,
244
'Server return code %d'
245
% curl.getinfo(pycurl.HTTP_CODE))
246
msg = self._parse_headers(header)
247
return code, response.handle_response(abspath, code, msg, data)
249
def _parse_headers(self, status_and_headers):
250
"""Transform the headers provided by curl into an HTTPMessage"""
251
status_and_headers.seek(0)
253
status_and_headers.readline()
254
msg = httplib.HTTPMessage(status_and_headers)
257
def _post(self, body_bytes):
258
curl = self._get_curl()
259
abspath, data, header = self._setup_request(curl, '.bzr/smart')
260
curl.setopt(pycurl.POST, 1)
261
fake_file = StringIO(body_bytes)
262
curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
263
curl.setopt(pycurl.READFUNCTION, fake_file.read)
264
# We override the Expect: header so that pycurl will send the POST
267
self._curl_perform(curl, header, ['Expect: '])
268
except pycurl.error, e:
269
if e[0] == CURLE_SEND_ERROR:
270
# When talking to an HTTP/1.0 server, getting a 400+ error code
271
# triggers a bug in some combinations of curl/kernel in rare
272
# occurrences. Basically, the server closes the connection
273
# after sending the error but the client (having received and
274
# parsed the response) still try to send the request body (see
275
# bug #225020 and its upstream associated bug). Since the
276
# error code and the headers are known to be available, we just
277
# swallow the exception, leaving the upper levels handle the
279
mutter('got pycurl error in POST: %s, %s, %s, url: %s ',
280
e[0], e[1], e, abspath)
285
code = curl.getinfo(pycurl.HTTP_CODE)
286
msg = self._parse_headers(header)
287
return code, response.handle_response(abspath, code, msg, data)
289
def _raise_curl_http_error(self, curl, info=None):
290
code = curl.getinfo(pycurl.HTTP_CODE)
291
url = curl.getinfo(pycurl.EFFECTIVE_URL)
292
# Some error codes can be handled the same way for all
295
raise errors.TransportError(
296
'Server refuses to fulfill the request (403 Forbidden)'
303
raise errors.InvalidHttpResponse(
304
url, 'Unable to handle http code %d%s' % (code,msg))
306
def _set_curl_options(self, curl):
307
"""Set options for all requests"""
308
if 'http' in debug.debug_flags:
309
curl.setopt(pycurl.VERBOSE, 1)
310
# pycurl doesn't implement the CURLOPT_STDERR option, so we can't
311
# do : curl.setopt(pycurl.STDERR, trace._trace_file)
313
ua_str = 'bzr/%s (pycurl: %s)' % (bzrlib.__version__, pycurl.version)
314
curl.setopt(pycurl.USERAGENT, ua_str)
316
curl.setopt(pycurl.CAINFO, self.cabundle)
317
# Set accepted auth methods
318
curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
319
curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY)
320
auth = self._get_credentials()
321
user = auth.get('user', None)
322
password = auth.get('password', None)
325
userpass = user + ':'
326
if password is not None: # '' is a valid password
328
curl.setopt(pycurl.USERPWD, userpass)
330
def _curl_perform(self, curl, header, more_headers=[]):
331
"""Perform curl operation and translate exceptions."""
333
# There's no way in http/1.0 to say "must
334
# revalidate"; we don't want to force it to always
335
# retrieve. so just turn off the default Pragma
337
headers = ['Cache-control: max-age=0',
339
'Connection: Keep-Alive']
340
curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
342
except pycurl.error, e:
343
url = curl.getinfo(pycurl.EFFECTIVE_URL)
344
mutter('got pycurl error: %s, %s, %s, url: %s ',
346
if e[0] in (CURLE_SSL_CACERT_BADFILE,
347
CURLE_COULDNT_RESOLVE_HOST,
348
CURLE_COULDNT_CONNECT,
350
CURLE_COULDNT_RESOLVE_PROXY,):
351
raise errors.ConnectionError(
352
'curl connection error (%s)\non %s' % (e[1], url))
353
elif e[0] == CURLE_PARTIAL_FILE:
354
# Pycurl itself has detected a short read. We do not have all
355
# the information for the ShortReadvError, but that should be
357
raise errors.ShortReadvError(url,
358
offset='unknown', length='unknown',
360
extra='Server aborted the request')
362
code = curl.getinfo(pycurl.HTTP_CODE)
363
if code in (301, 302, 303, 307):
364
url = curl.getinfo(pycurl.EFFECTIVE_URL)
365
msg = self._parse_headers(header)
366
redirected_to = msg.getheader('location')
367
raise errors.RedirectRequested(url,
369
is_permanent=(code == 301),
370
qual_proto=self._scheme)
373
def get_test_permutations():
374
"""Return the permutations to be used in testing."""
375
from bzrlib.tests.http_server import HttpServer_PyCurl
376
return [(PyCurlTransport, HttpServer_PyCurl),