23
23
# whether we expect a particular file will be modified after it's committed.
24
24
# It's probably safer to just always revalidate. mbp 20060321
26
# TODO: Some refactoring could be done to avoid the strange idiom
27
# used to capture data and headers while setting up the request
28
# (and having to pass 'header' to _curl_perform to handle
29
# redirections) . This could be achieved by creating a
30
# specialized Curl object and returning code, headers and data
31
# from _curl_perform. Not done because we may deprecate pycurl in the
32
# future -- vila 20070212
27
35
from cStringIO import StringIO
29
from bzrlib import errors
40
__version__ as bzrlib_version,
31
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
32
TransportError, ConnectionError,
43
from bzrlib.errors import (NoSuchFile,
33
45
DependencyNotPresent)
34
46
from bzrlib.trace import mutter
35
47
from bzrlib.transport import register_urlparse_netloc_protocol
36
from bzrlib.transport.http import (HttpTransportBase, HttpServer,
38
response, _pycurl_errors)
48
from bzrlib.transport.http import (
68
84
PyCurl is a Python binding to the C "curl" multiprotocol client.
70
This transport can be significantly faster than the builtin Python client.
71
Advantages include: DNS caching, connection keepalive, and ability to
72
set headers to allow caching.
86
This transport can be significantly faster than the builtin
87
Python client. Advantages include: DNS caching.
75
90
def __init__(self, base, from_transport=None):
76
91
super(PyCurlTransport, self).__init__(base)
92
if base.startswith('https'):
93
# Check availability of https into pycurl supported
95
supported = pycurl.version_info()[8]
96
if 'https' not in supported:
97
raise DependencyNotPresent('pycurl', 'no https support')
98
self.cabundle = ca_bundle.get_ca_path()
77
99
if from_transport is not None:
78
self._base_curl = from_transport._base_curl
79
self._range_curl = from_transport._range_curl
100
self._curl = from_transport._curl
81
102
mutter('using pycurl %s' % pycurl.version)
82
self._base_curl = pycurl.Curl()
83
self._range_curl = pycurl.Curl()
103
self._curl = pycurl.Curl()
85
105
def should_cache(self):
86
106
"""Return True if the data pulled across should be cached locally.
91
111
"""See Transport.has()"""
92
112
# We set NO BODY=0 in _get_full, so it should be safe
93
113
# to re-use the non-range curl object
94
curl = self._base_curl
95
115
abspath = self._real_abspath(relpath)
96
116
curl.setopt(pycurl.URL, abspath)
97
117
self._set_curl_options(curl)
118
curl.setopt(pycurl.HTTPGET, 1)
98
119
# don't want the body - ie just do a HEAD request
99
120
# This means "NO BODY" not 'nobody'
100
121
curl.setopt(pycurl.NOBODY, 1)
101
self._curl_perform(curl)
122
# But we need headers to handle redirections
124
curl.setopt(pycurl.HEADERFUNCTION, header.write)
125
# In some erroneous cases, pycurl will emit text on
126
# stdout if we don't catch it (see InvalidStatus tests
127
# for one such occurrence).
128
blackhole = StringIO()
129
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
130
self._curl_perform(curl, header)
102
131
code = curl.getinfo(pycurl.HTTP_CODE)
103
132
if code == 404: # not found
105
elif code in (200, 302): # "ok", "found"
134
elif code == 200: # "ok"
108
137
self._raise_curl_http_error(curl)
110
139
def _get(self, relpath, ranges, tail_amount=0):
111
140
# This just switches based on the type of request
112
141
if ranges is not None or tail_amount not in (0, None):
113
142
return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
115
144
return self._get_full(relpath)
117
146
def _setup_get_request(self, curl, relpath):
147
# Make sure we do a GET request. versions > 7.14.1 also set the
148
# NO BODY flag, but we'll do it ourselves in case it is an older
150
curl.setopt(pycurl.NOBODY, 0)
151
curl.setopt(pycurl.HTTPGET, 1)
152
return self._setup_request(curl, relpath)
154
def _setup_request(self, curl, relpath):
118
155
"""Do the common setup stuff for making a request
120
157
:param curl: The curl object to place the request on
153
185
raise NoSuchFile(abspath)
155
self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')
187
self._raise_curl_http_error(
188
curl, 'expected 200 or 404 for full response.')
157
190
return code, data
159
192
def _get_ranged(self, relpath, ranges, tail_amount):
160
193
"""Make a request for just part of the file."""
161
# We would like to re-use the same curl object for
162
# full requests and partial requests
163
# Documentation says 'Pass in NULL to disable the use of ranges'
164
# None is the closest we have, but at least with pycurl 7.13.1
165
# It raises an 'invalid arguments' response
166
# curl.setopt(pycurl.RANGE, None)
167
# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
168
# So instead we hack around this by using a separate objects
169
curl = self._range_curl
170
195
abspath, data, header = self._setup_get_request(curl, relpath)
172
curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))
173
self._curl_perform(curl)
197
range_header = self.attempted_range_header(ranges, tail_amount)
198
if range_header is None:
199
# Forget ranges, the server can't handle them
200
return self._get_full(relpath)
202
self._curl_perform(curl, header, ['Range: bytes=%s' % range_header])
176
205
code = curl.getinfo(pycurl.HTTP_CODE)
179
208
# handle_response will raise NoSuchFile, etc based on the response code
180
209
return code, response.handle_response(abspath, code, headers, data)
182
def _raise_curl_connection_error(self, curl):
183
curl_errno = curl.getinfo(pycurl.OS_ERRNO)
184
url = curl.getinfo(pycurl.EFFECTIVE_URL)
185
raise ConnectionError('curl connection error (%s) on %s'
186
% (os.strerror(curl_errno), url))
211
def _post(self, body_bytes):
212
fake_file = StringIO(body_bytes)
214
# Other places that use _base_curl for GET requests explicitly set
215
# HTTPGET, so it should be safe to re-use the same object for both GETs
217
curl.setopt(pycurl.POST, 1)
218
curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
219
curl.setopt(pycurl.READFUNCTION, fake_file.read)
220
abspath, data, header = self._setup_request(curl, '.bzr/smart')
221
# We override the Expect: header so that pycurl will send the POST
223
self._curl_perform(curl, header, ['Expect: '])
225
code = curl.getinfo(pycurl.HTTP_CODE)
226
headers = _extract_headers(header.getvalue(), abspath)
227
return code, response.handle_response(abspath, code, headers, data)
188
229
def _raise_curl_http_error(self, curl, info=None):
189
230
code = curl.getinfo(pycurl.HTTP_CODE)
190
231
url = curl.getinfo(pycurl.EFFECTIVE_URL)
232
# Some error codes can be handled the same way for all
235
raise errors.TransportError(
236
'Server refuses to fullfil the request for: %s' % url)
195
raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'
242
raise errors.InvalidHttpResponse(
243
url, 'Unable to handle http code %d%s' % (code,msg))
198
245
def _set_curl_options(self, curl):
199
246
"""Set options for all requests"""
200
# There's no way in http/1.0 to say "must revalidate"; we don't want
201
# to force it to always retrieve. so just turn off the default Pragma
203
headers = ['Cache-control: max-age=0',
205
'Connection: Keep-Alive']
206
247
## curl.setopt(pycurl.VERBOSE, 1)
207
248
# TODO: maybe include a summary of the pycurl version
208
249
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
209
250
curl.setopt(pycurl.USERAGENT, ua_str)
210
curl.setopt(pycurl.HTTPHEADER, headers)
211
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
252
curl.setopt(pycurl.CAINFO, self.cabundle)
213
def _curl_perform(self, curl):
254
def _curl_perform(self, curl, header, more_headers=[]):
214
255
"""Perform curl operation and translate exceptions."""
257
# There's no way in http/1.0 to say "must
258
# revalidate"; we don't want to force it to always
259
# retrieve. so just turn off the default Pragma
261
headers = ['Cache-control: max-age=0',
263
'Connection: Keep-Alive']
264
curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
217
266
except pycurl.error, e:
218
# XXX: There seem to be no symbolic constants for these values.
219
267
url = curl.getinfo(pycurl.EFFECTIVE_URL)
220
268
mutter('got pycurl error: %s, %s, %s, url: %s ',
221
269
e[0], _pycurl_errors.errorcode[e[0]], e, url)
222
270
if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,
223
_pycurl_errors.CURLE_COULDNT_CONNECT):
224
self._raise_curl_connection_error(curl)
225
# jam 20060713 The code didn't use to re-raise the exception here
271
_pycurl_errors.CURLE_COULDNT_CONNECT,
272
_pycurl_errors.CURLE_GOT_NOTHING,
273
_pycurl_errors.CURLE_COULDNT_RESOLVE_PROXY):
274
raise ConnectionError('curl connection error (%s)\non %s'
276
elif e[0] == _pycurl_errors.CURLE_PARTIAL_FILE:
277
# Pycurl itself has detected a short read. We do
278
# not have all the information for the
279
# ShortReadvError, but that should be enough
280
raise errors.ShortReadvError(url,
281
offset='unknown', length='unknown',
283
extra='Server aborted the request')
284
# jam 20060713 The code didn't use to re-raise the exception here,
226
285
# but that seemed bogus
230
class HttpServer_PyCurl(HttpServer):
231
"""Subclass of HttpServer that gives http+pycurl urls.
233
This is for use in testing: connections to this server will always go
234
through pycurl where possible.
237
# urls returned by this server should require the pycurl client impl
238
_url_protocol = 'http+pycurl'
287
code = curl.getinfo(pycurl.HTTP_CODE)
288
if code in (301, 302, 303, 307):
289
url = curl.getinfo(pycurl.EFFECTIVE_URL)
290
headers = _extract_headers(header.getvalue(), url)
291
redirected_to = headers['Location']
292
raise errors.RedirectRequested(url,
294
is_permament=(code == 301),
295
qual_proto=self._qualified_proto)
241
298
def get_test_permutations():
242
299
"""Return the permutations to be used in testing."""
300
from bzrlib.tests.HttpServer import HttpServer_PyCurl
243
301
return [(PyCurlTransport, HttpServer_PyCurl),