65
100
PyCurl is a Python binding to the C "curl" multiprotocol client.
67
This transport can be significantly faster than the builtin Python client.
68
Advantages include: DNS caching, connection keepalive, and ability to
69
set headers to allow caching.
102
This transport can be significantly faster than the builtin
103
Python client. Advantages include: DNS caching.
72
def __init__(self, base):
73
super(PyCurlTransport, self).__init__(base)
74
mutter('using pycurl %s' % pycurl.version)
106
def __init__(self, base, _from_transport=None):
107
super(PyCurlTransport, self).__init__(base,
108
_from_transport=_from_transport)
109
if base.startswith('https'):
110
# Check availability of https into pycurl supported
112
supported = pycurl.version_info()[8]
113
if 'https' not in supported:
114
raise errors.DependencyNotPresent('pycurl', 'no https support')
115
self.cabundle = ca_bundle.get_ca_path()
76
def should_cache(self):
77
"""Return True if the data pulled across should be cached locally.
118
connection = self._get_connection()
119
if connection is None:
120
# First connection ever. There is no credentials for pycurl, either
121
# the password was embedded in the URL or it's not needed. The
122
# connection for pycurl is just the Curl object, it will not
123
# connect to the http server until the first request (which had
125
connection = pycurl.Curl()
126
# First request, initialize credentials.
127
auth = self._create_auth()
128
# Proxy handling is out of reach, so we punt
129
self._set_connection(connection, auth)
81
132
def has(self, relpath):
83
abspath = self._real_abspath(relpath)
133
"""See Transport.has()"""
134
# We set NO BODY=0 in _get_full, so it should be safe
135
# to re-use the non-range curl object
136
curl = self._get_curl()
137
abspath = self._remote_path(relpath)
84
138
curl.setopt(pycurl.URL, abspath)
85
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
86
139
self._set_curl_options(curl)
140
curl.setopt(pycurl.HTTPGET, 1)
87
141
# don't want the body - ie just do a HEAD request
142
# This means "NO BODY" not 'nobody'
88
143
curl.setopt(pycurl.NOBODY, 1)
89
self._curl_perform(curl)
144
# But we need headers to handle redirections
146
curl.setopt(pycurl.HEADERFUNCTION, header.write)
147
# In some erroneous cases, pycurl will emit text on
148
# stdout if we don't catch it (see InvalidStatus tests
149
# for one such occurrence).
150
blackhole = StringIO()
151
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
152
self._curl_perform(curl, header)
90
153
code = curl.getinfo(pycurl.HTTP_CODE)
91
154
if code == 404: # not found
93
elif code in (200, 302): # "ok", "found"
156
elif code == 200: # "ok"
96
self._raise_curl_connection_error(curl)
98
159
self._raise_curl_http_error(curl)
100
def _get(self, relpath, ranges):
102
abspath = self._real_abspath(relpath)
104
curl.setopt(pycurl.URL, abspath)
105
self._set_curl_options(curl)
106
curl.setopt(pycurl.WRITEFUNCTION, sio.write)
161
def _get(self, relpath, offsets, tail_amount=0):
162
# This just switches based on the type of request
163
if offsets is not None or tail_amount not in (0, None):
164
return self._get_ranged(relpath, offsets, tail_amount=tail_amount)
166
return self._get_full(relpath)
168
def _setup_get_request(self, curl, relpath):
169
# Make sure we do a GET request. versions > 7.14.1 also set the
170
# NO BODY flag, but we'll do it ourselves in case it is an older
107
172
curl.setopt(pycurl.NOBODY, 0)
108
if ranges is not None:
109
assert len(ranges) == 1
110
# multiple ranges not supported yet because we can't decode the
112
curl.setopt(pycurl.RANGE, '%d-%d' % ranges[0])
113
self._curl_perform(curl)
173
curl.setopt(pycurl.HTTPGET, 1)
174
return self._setup_request(curl, relpath)
176
def _setup_request(self, curl, relpath):
177
"""Do the common setup stuff for making a request
179
:param curl: The curl object to place the request on
180
:param relpath: The relative path that we want to get
181
:return: (abspath, data, header)
183
data: file that will be filled with the body
184
header: file that will be filled with the headers
186
abspath = self._remote_path(relpath)
187
curl.setopt(pycurl.URL, abspath)
188
self._set_curl_options(curl)
192
curl.setopt(pycurl.WRITEFUNCTION, data.write)
193
curl.setopt(pycurl.HEADERFUNCTION, header.write)
195
return abspath, data, header
197
def _get_full(self, relpath):
198
"""Make a request for the entire file"""
199
curl = self._get_curl()
200
abspath, data, header = self._setup_get_request(curl, relpath)
201
self._curl_perform(curl, header)
114
203
code = curl.getinfo(pycurl.HTTP_CODE)
116
raise NoSuchFile(abspath)
120
elif code == 206 and (ranges is not None):
124
self._raise_curl_connection_error(curl)
207
raise errors.NoSuchFile(abspath)
209
self._raise_curl_http_error(
210
curl, 'expected 200 or 404 for full response.')
214
# The parent class use 0 to minimize the requests, but since we can't
215
# exploit the results as soon as they are received (pycurl limitation) we'd
216
# better issue more requests and provide a more responsive UI do the cost
217
# of more latency costs.
218
# If you modify this, think about modifying the comment in http/__init__.py
220
_get_max_size = 4 * 1024 * 1024
222
def _get_ranged(self, relpath, offsets, tail_amount):
223
"""Make a request for just part of the file."""
224
curl = self._get_curl()
225
abspath, data, header = self._setup_get_request(curl, relpath)
227
range_header = self._attempted_range_header(offsets, tail_amount)
228
if range_header is None:
229
# Forget ranges, the server can't handle them
230
return self._get_full(relpath)
232
self._curl_perform(curl, header, ['Range: bytes=%s' % range_header])
235
code = curl.getinfo(pycurl.HTTP_CODE)
237
if code == 404: # not found
238
raise errors.NoSuchFile(abspath)
239
elif code in (400, 416):
240
# We don't know which, but one of the ranges we specified was
242
raise errors.InvalidHttpRange(abspath, range_header,
243
'Server return code %d'
244
% curl.getinfo(pycurl.HTTP_CODE))
245
msg = self._parse_headers(header)
246
return code, response.handle_response(abspath, code, msg, data)
248
def _parse_headers(self, status_and_headers):
249
"""Transform the headers provided by curl into an HTTPMessage"""
250
status_and_headers.seek(0)
252
status_and_headers.readline()
253
msg = httplib.HTTPMessage(status_and_headers)
256
def _post(self, body_bytes):
257
fake_file = StringIO(body_bytes)
258
curl = self._get_curl()
259
# Other places that use the Curl object (returned by _get_curl)
260
# for GET requests explicitly set HTTPGET, so it should be safe to
261
# re-use the same object for both GETs and POSTs.
262
curl.setopt(pycurl.POST, 1)
263
curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
264
curl.setopt(pycurl.READFUNCTION, fake_file.read)
265
abspath, data, header = self._setup_request(curl, '.bzr/smart')
266
# We override the Expect: header so that pycurl will send the POST
268
self._curl_perform(curl, header, ['Expect: '])
270
code = curl.getinfo(pycurl.HTTP_CODE)
271
msg = self._parse_headers(header)
272
return code, response.handle_response(abspath, code, msg, data)
274
def _raise_curl_http_error(self, curl, info=None):
275
code = curl.getinfo(pycurl.HTTP_CODE)
276
url = curl.getinfo(pycurl.EFFECTIVE_URL)
277
# Some error codes can be handled the same way for all
280
raise errors.TransportError(
281
'Server refuses to fulfill the request (403 Forbidden)'
126
self._raise_curl_http_error(curl)
128
def _raise_curl_connection_error(self, curl):
129
curl_errno = curl.getinfo(pycurl.OS_ERRNO)
130
url = curl.getinfo(pycurl.EFFECTIVE_URL)
131
raise ConnectionError('curl connection error (%s) on %s'
132
% (os.strerror(curl_errno), url))
134
def _raise_curl_http_error(self, curl):
135
code = curl.getinfo(pycurl.HTTP_CODE)
136
url = curl.getinfo(pycurl.EFFECTIVE_URL)
137
raise TransportError('http error %d probing for %s' %
288
raise errors.InvalidHttpResponse(
289
url, 'Unable to handle http code %d%s' % (code,msg))
140
291
def _set_curl_options(self, curl):
141
292
"""Set options for all requests"""
142
# There's no way in http/1.0 to say "must revalidate"; we don't want
143
# to force it to always retrieve. so just turn off the default Pragma
145
headers = ['Cache-control: max-age=0',
147
## curl.setopt(pycurl.VERBOSE, 1)
148
# TODO: maybe include a summary of the pycurl version
149
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__)
293
if 'http' in debug.debug_flags:
294
curl.setopt(pycurl.VERBOSE, 1)
295
# pycurl doesn't implement the CURLOPT_STDERR option, so we can't
296
# do : curl.setopt(pycurl.STDERR, trace._trace_file)
298
ua_str = 'bzr/%s (pycurl: %s)' % (bzrlib.__version__, pycurl.version)
150
299
curl.setopt(pycurl.USERAGENT, ua_str)
151
curl.setopt(pycurl.HTTPHEADER, headers)
152
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
301
curl.setopt(pycurl.CAINFO, self.cabundle)
302
# Set accepted auth methods
303
curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
304
curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY)
305
auth = self._get_credentials()
306
user = auth.get('user', None)
307
password = auth.get('password', None)
310
userpass = user + ':'
311
if password is not None: # '' is a valid password
313
curl.setopt(pycurl.USERPWD, userpass)
154
def _curl_perform(self, curl):
315
def _curl_perform(self, curl, header, more_headers=[]):
155
316
"""Perform curl operation and translate exceptions."""
318
# There's no way in http/1.0 to say "must
319
# revalidate"; we don't want to force it to always
320
# retrieve. so just turn off the default Pragma
322
headers = ['Cache-control: max-age=0',
324
'Connection: Keep-Alive']
325
curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
158
327
except pycurl.error, e:
159
# XXX: There seem to be no symbolic constants for these values.
161
# couldn't resolve host
162
raise NoSuchFile(curl.getinfo(pycurl.EFFECTIVE_URL), e)
165
class HttpServer_PyCurl(HttpServer):
166
"""Subclass of HttpServer that gives http+pycurl urls.
168
This is for use in testing: connections to this server will always go
169
through pycurl where possible.
172
# urls returned by this server should require the pycurl client impl
173
_url_protocol = 'http+pycurl'
328
url = curl.getinfo(pycurl.EFFECTIVE_URL)
329
mutter('got pycurl error: %s, %s, %s, url: %s ',
331
if e[0] in (CURLE_SSL_CACERT_BADFILE,
332
CURLE_COULDNT_RESOLVE_HOST,
333
CURLE_COULDNT_CONNECT,
335
CURLE_COULDNT_RESOLVE_PROXY,):
336
raise errors.ConnectionError(
337
'curl connection error (%s)\non %s' % (e[1], url))
338
elif e[0] == CURLE_PARTIAL_FILE:
339
# Pycurl itself has detected a short read. We do not have all
340
# the information for the ShortReadvError, but that should be
342
raise errors.ShortReadvError(url,
343
offset='unknown', length='unknown',
345
extra='Server aborted the request')
347
code = curl.getinfo(pycurl.HTTP_CODE)
348
if code in (301, 302, 303, 307):
349
url = curl.getinfo(pycurl.EFFECTIVE_URL)
350
msg = self._parse_headers(header)
351
redirected_to = msg.getheader('location')
352
raise errors.RedirectRequested(url,
354
is_permanent=(code == 301),
355
qual_proto=self._scheme)
176
358
def get_test_permutations():
177
359
"""Return the permutations to be used in testing."""
360
from bzrlib.tests.http_server import HttpServer_PyCurl
178
361
return [(PyCurlTransport, HttpServer_PyCurl),