27
27
from cStringIO import StringIO
29
from bzrlib import errors
32
__version__ as bzrlib_version,
31
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
32
TransportError, ConnectionError,
35
from bzrlib.errors import (NoSuchFile,
33
37
DependencyNotPresent)
34
38
from bzrlib.trace import mutter
35
39
from bzrlib.transport import register_urlparse_netloc_protocol
36
from bzrlib.transport.http import (HttpTransportBase, HttpServer,
38
response, _pycurl_errors)
40
from bzrlib.transport.http import (
68
76
PyCurl is a Python binding to the C "curl" multiprotocol client.
70
This transport can be significantly faster than the builtin Python client.
71
Advantages include: DNS caching, connection keepalive, and ability to
72
set headers to allow caching.
78
This transport can be significantly faster than the builtin
79
Python client. Advantages include: DNS caching.
75
82
def __init__(self, base, from_transport=None):
76
83
super(PyCurlTransport, self).__init__(base)
84
if base.startswith('https'):
85
# Check availability of https into pycurl supported
87
supported = pycurl.version_info()[8]
88
if 'https' not in supported:
89
raise DependencyNotPresent('pycurl', 'no https support')
90
self.cabundle = ca_bundle.get_ca_path()
77
91
if from_transport is not None:
78
self._base_curl = from_transport._base_curl
79
self._range_curl = from_transport._range_curl
92
self._curl = from_transport._curl
81
94
mutter('using pycurl %s' % pycurl.version)
82
self._base_curl = pycurl.Curl()
83
self._range_curl = pycurl.Curl()
95
self._curl = pycurl.Curl()
85
97
def should_cache(self):
86
98
"""Return True if the data pulled across should be cached locally.
109
126
self._raise_curl_http_error(curl)
111
128
def _get(self, relpath, ranges, tail_amount=0):
112
129
# This just switches based on the type of request
113
130
if ranges is not None or tail_amount not in (0, None):
114
131
return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
116
133
return self._get_full(relpath)
118
135
def _setup_get_request(self, curl, relpath):
119
136
# Make sure we do a GET request. versions > 7.14.1 also set the
120
137
# NO BODY flag, but we'll do it ourselves in case it is an older
157
174
raise NoSuchFile(abspath)
159
self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')
176
self._raise_curl_http_error(
177
curl, 'expected 200 or 404 for full response.')
161
179
return code, data
163
181
def _get_ranged(self, relpath, ranges, tail_amount):
164
182
"""Make a request for just part of the file."""
165
# We would like to re-use the same curl object for
166
# full requests and partial requests
167
# Documentation says 'Pass in NULL to disable the use of ranges'
168
# None is the closest we have, but at least with pycurl 7.13.1
169
# It raises an 'invalid arguments' response
170
# curl.setopt(pycurl.RANGE, None)
171
# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
172
# So instead we hack around this by using a separate objects
173
curl = self._range_curl
174
184
abspath, data, header = self._setup_get_request(curl, relpath)
176
curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))
177
self._curl_perform(curl)
186
range_header = self.attempted_range_header(ranges, tail_amount)
187
if range_header is None:
188
# Forget ranges, the server can't handle them
189
return self._get_full(relpath)
191
self._curl_perform(curl, ['Range: bytes=%s'
192
% self.range_header(ranges, tail_amount)])
180
195
code = curl.getinfo(pycurl.HTTP_CODE)
202
219
def _raise_curl_http_error(self, curl, info=None):
203
220
code = curl.getinfo(pycurl.HTTP_CODE)
204
221
url = curl.getinfo(pycurl.EFFECTIVE_URL)
222
# Some error codes can be handled the same way for all
225
raise errors.TransportError(
226
'Server refuses to fullfil the request for: %s' % url)
209
raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'
232
raise errors.InvalidHttpResponse(
233
url, 'Unable to handle http code %d%s' % (code,msg))
212
235
def _set_curl_options(self, curl):
213
236
"""Set options for all requests"""
214
# There's no way in http/1.0 to say "must revalidate"; we don't want
215
# to force it to always retrieve. so just turn off the default Pragma
217
# Also, we override the Expect: header so that pycurl will send the POST
219
headers = ['Cache-control: max-age=0',
221
'Connection: Keep-Alive',
223
237
## curl.setopt(pycurl.VERBOSE, 1)
224
238
# TODO: maybe include a summary of the pycurl version
225
239
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
226
240
curl.setopt(pycurl.USERAGENT, ua_str)
227
curl.setopt(pycurl.HTTPHEADER, headers)
228
241
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
243
curl.setopt(pycurl.CAINFO, self.cabundle)
230
def _curl_perform(self, curl):
245
def _curl_perform(self, curl, more_headers=[]):
231
246
"""Perform curl operation and translate exceptions."""
248
# There's no way in http/1.0 to say "must
249
# revalidate"; we don't want to force it to always
250
# retrieve. so just turn off the default Pragma
252
headers = ['Cache-control: max-age=0',
254
'Connection: Keep-Alive']
255
curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
234
257
except pycurl.error, e:
235
# XXX: There seem to be no symbolic constants for these values.
236
258
url = curl.getinfo(pycurl.EFFECTIVE_URL)
237
259
mutter('got pycurl error: %s, %s, %s, url: %s ',
238
260
e[0], _pycurl_errors.errorcode[e[0]], e, url)
239
261
if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,
240
262
_pycurl_errors.CURLE_COULDNT_CONNECT,
263
_pycurl_errors.CURLE_GOT_NOTHING,
241
264
_pycurl_errors.CURLE_COULDNT_RESOLVE_PROXY):
242
265
raise ConnectionError('curl connection error (%s)\non %s'
244
# jam 20060713 The code didn't use to re-raise the exception here
267
elif e[0] == _pycurl_errors.CURLE_PARTIAL_FILE:
268
# Pycurl itself has detected a short read. We do
269
# not have all the information for the
270
# ShortReadvError, but that should be enough
271
raise errors.ShortReadvError(url,
272
offset='unknown', length='unknown',
274
extra='Server aborted the request')
275
# jam 20060713 The code didn't use to re-raise the exception here,
245
276
# but that seemed bogus
249
class HttpServer_PyCurl(HttpServer):
250
"""Subclass of HttpServer that gives http+pycurl urls.
252
This is for use in testing: connections to this server will always go
253
through pycurl where possible.
256
# urls returned by this server should require the pycurl client impl
257
_url_protocol = 'http+pycurl'
260
280
def get_test_permutations():
261
281
"""Return the permutations to be used in testing."""
282
from bzrlib.tests.HttpServer import HttpServer_PyCurl
262
283
return [(PyCurlTransport, HttpServer_PyCurl),