/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/_pycurl.py

  • Committer: Aaron Bentley
  • Date: 2007-03-03 17:17:53 UTC
  • mfrom: (2309 +trunk)
  • mto: This revision was merged to the branch mainline in revision 2316.
  • Revision ID: aaron.bentley@utoronto.ca-20070303171753-o0s1yrxx5sn12p2k
Merge bzr.dev

Show diffs side-by-side

added added

removed removed

Lines of Context:
25
25
 
26
26
import os
27
27
from cStringIO import StringIO
 
28
import sys
28
29
 
29
 
from bzrlib import errors
 
30
from bzrlib import (
 
31
    errors,
 
32
    __version__ as bzrlib_version,
 
33
    )
30
34
import bzrlib
31
 
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
32
 
                           TransportError, ConnectionError,
 
35
from bzrlib.errors import (NoSuchFile,
 
36
                           ConnectionError,
33
37
                           DependencyNotPresent)
34
38
from bzrlib.trace import mutter
35
39
from bzrlib.transport import register_urlparse_netloc_protocol
36
 
from bzrlib.transport.http import (HttpTransportBase, HttpServer,
37
 
                                   _extract_headers,
38
 
                                   response, _pycurl_errors)
 
40
from bzrlib.transport.http import (
 
41
    ca_bundle,
 
42
    _extract_headers,
 
43
    HttpTransportBase,
 
44
    _pycurl_errors,
 
45
    response,
 
46
    )
39
47
 
40
48
try:
41
49
    import pycurl
67
75
 
68
76
    PyCurl is a Python binding to the C "curl" multiprotocol client.
69
77
 
70
 
    This transport can be significantly faster than the builtin Python client. 
71
 
    Advantages include: DNS caching, connection keepalive, and ability to 
72
 
    set headers to allow caching.
 
78
    This transport can be significantly faster than the builtin
 
79
    Python client.  Advantages include: DNS caching.
73
80
    """
74
81
 
75
82
    def __init__(self, base, from_transport=None):
76
83
        super(PyCurlTransport, self).__init__(base)
 
84
        if base.startswith('https'):
 
85
            # Check availability of https into pycurl supported
 
86
            # protocols
 
87
            supported = pycurl.version_info()[8]
 
88
            if 'https' not in supported:
 
89
                raise DependencyNotPresent('pycurl', 'no https support')
 
90
        self.cabundle = ca_bundle.get_ca_path()
77
91
        if from_transport is not None:
78
 
            self._base_curl = from_transport._base_curl
79
 
            self._range_curl = from_transport._range_curl
 
92
            self._curl = from_transport._curl
80
93
        else:
81
94
            mutter('using pycurl %s' % pycurl.version)
82
 
            self._base_curl = pycurl.Curl()
83
 
            self._range_curl = pycurl.Curl()
 
95
            self._curl = pycurl.Curl()
84
96
 
85
97
    def should_cache(self):
86
98
        """Return True if the data pulled across should be cached locally.
91
103
        """See Transport.has()"""
92
104
        # We set NO BODY=0 in _get_full, so it should be safe
93
105
        # to re-use the non-range curl object
94
 
        curl = self._base_curl
 
106
        curl = self._curl
95
107
        abspath = self._real_abspath(relpath)
96
108
        curl.setopt(pycurl.URL, abspath)
97
109
        self._set_curl_options(curl)
99
111
        # don't want the body - ie just do a HEAD request
100
112
        # This means "NO BODY" not 'nobody'
101
113
        curl.setopt(pycurl.NOBODY, 1)
 
114
        # In some erroneous cases, pycurl will emit text on
 
115
        # stdout if we don't catch it (see InvalidStatus tests
 
116
        # for one such occurrence).
 
117
        blackhole = StringIO()
 
118
        curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
102
119
        self._curl_perform(curl)
103
120
        code = curl.getinfo(pycurl.HTTP_CODE)
104
121
        if code == 404: # not found
107
124
            return True
108
125
        else:
109
126
            self._raise_curl_http_error(curl)
110
 
        
 
127
 
111
128
    def _get(self, relpath, ranges, tail_amount=0):
112
129
        # This just switches based on the type of request
113
130
        if ranges is not None or tail_amount not in (0, None):
114
131
            return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
115
132
        else:
116
133
            return self._get_full(relpath)
117
 
    
 
134
 
118
135
    def _setup_get_request(self, curl, relpath):
119
136
        # Make sure we do a GET request. versions > 7.14.1 also set the
120
137
        # NO BODY flag, but we'll do it ourselves in case it is an older
146
163
 
147
164
    def _get_full(self, relpath):
148
165
        """Make a request for the entire file"""
149
 
        curl = self._base_curl
 
166
        curl = self._curl
150
167
        abspath, data, header = self._setup_get_request(curl, relpath)
151
168
        self._curl_perform(curl)
152
169
 
156
173
        if code == 404:
157
174
            raise NoSuchFile(abspath)
158
175
        if code != 200:
159
 
            self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')
 
176
            self._raise_curl_http_error(
 
177
                curl, 'expected 200 or 404 for full response.')
160
178
 
161
179
        return code, data
162
180
 
163
181
    def _get_ranged(self, relpath, ranges, tail_amount):
164
182
        """Make a request for just part of the file."""
165
 
        # We would like to re-use the same curl object for 
166
 
        # full requests and partial requests
167
 
        # Documentation says 'Pass in NULL to disable the use of ranges'
168
 
        # None is the closest we have, but at least with pycurl 7.13.1
169
 
        # It raises an 'invalid arguments' response
170
 
        # curl.setopt(pycurl.RANGE, None)
171
 
        # curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
172
 
        # So instead we hack around this by using a separate objects
173
 
        curl = self._range_curl
 
183
        curl = self._curl
174
184
        abspath, data, header = self._setup_get_request(curl, relpath)
175
185
 
176
 
        curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))
177
 
        self._curl_perform(curl)
 
186
        range_header = self.attempted_range_header(ranges, tail_amount)
 
187
        if range_header is None:
 
188
            # Forget ranges, the server can't handle them
 
189
            return self._get_full(relpath)
 
190
 
 
191
        self._curl_perform(curl, ['Range: bytes=%s'
 
192
                                  % self.range_header(ranges, tail_amount)])
178
193
        data.seek(0)
179
194
 
180
195
        code = curl.getinfo(pycurl.HTTP_CODE)
185
200
 
186
201
    def _post(self, body_bytes):
187
202
        fake_file = StringIO(body_bytes)
188
 
        curl = self._base_curl
 
203
        curl = self._curl
189
204
        # Other places that use _base_curl for GET requests explicitly set
190
205
        # HTTPGET, so it should be safe to re-use the same object for both GETs
191
206
        # and POSTs.
193
208
        curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
194
209
        curl.setopt(pycurl.READFUNCTION, fake_file.read)
195
210
        abspath, data, header = self._setup_request(curl, '.bzr/smart')
196
 
        self._curl_perform(curl)
 
211
        # We override the Expect: header so that pycurl will send the POST
 
212
        # body immediately.
 
213
        self._curl_perform(curl,['Expect: '])
197
214
        data.seek(0)
198
215
        code = curl.getinfo(pycurl.HTTP_CODE)
199
216
        headers = _extract_headers(header.getvalue(), abspath)
202
219
    def _raise_curl_http_error(self, curl, info=None):
203
220
        code = curl.getinfo(pycurl.HTTP_CODE)
204
221
        url = curl.getinfo(pycurl.EFFECTIVE_URL)
205
 
        if info is None:
206
 
            msg = ''
 
222
        # Some error codes can be handled the same way for all
 
223
        # requests
 
224
        if code == 403:
 
225
            raise errors.TransportError(
 
226
                'Server refuses to fullfil the request for: %s' % url)
207
227
        else:
208
 
            msg = ': ' + info
209
 
        raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'
210
 
                                              % (code,msg))
 
228
            if info is None:
 
229
                msg = ''
 
230
            else:
 
231
                msg = ': ' + info
 
232
            raise errors.InvalidHttpResponse(
 
233
                url, 'Unable to handle http code %d%s' % (code,msg))
211
234
 
212
235
    def _set_curl_options(self, curl):
213
236
        """Set options for all requests"""
214
 
        # There's no way in http/1.0 to say "must revalidate"; we don't want
215
 
        # to force it to always retrieve.  so just turn off the default Pragma
216
 
        # provided by Curl.
217
 
        # Also, we override the Expect: header so that pycurl will send the POST
218
 
        # body immediately.
219
 
        headers = ['Cache-control: max-age=0',
220
 
                   'Pragma: no-cache',
221
 
                   'Connection: Keep-Alive',
222
 
                   'Expect: ',]
223
237
        ## curl.setopt(pycurl.VERBOSE, 1)
224
238
        # TODO: maybe include a summary of the pycurl version
225
239
        ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
226
240
        curl.setopt(pycurl.USERAGENT, ua_str)
227
 
        curl.setopt(pycurl.HTTPHEADER, headers)
228
241
        curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
 
242
        if self.cabundle:
 
243
            curl.setopt(pycurl.CAINFO, self.cabundle)
229
244
 
230
 
    def _curl_perform(self, curl):
 
245
    def _curl_perform(self, curl, more_headers=[]):
231
246
        """Perform curl operation and translate exceptions."""
232
247
        try:
 
248
            # There's no way in http/1.0 to say "must
 
249
            # revalidate"; we don't want to force it to always
 
250
            # retrieve.  so just turn off the default Pragma
 
251
            # provided by Curl.
 
252
            headers = ['Cache-control: max-age=0',
 
253
                       'Pragma: no-cache',
 
254
                       'Connection: Keep-Alive']
 
255
            curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
233
256
            curl.perform()
234
257
        except pycurl.error, e:
235
 
            # XXX: There seem to be no symbolic constants for these values.
236
258
            url = curl.getinfo(pycurl.EFFECTIVE_URL)
237
259
            mutter('got pycurl error: %s, %s, %s, url: %s ',
238
260
                    e[0], _pycurl_errors.errorcode[e[0]], e, url)
239
261
            if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,
240
262
                        _pycurl_errors.CURLE_COULDNT_CONNECT,
 
263
                        _pycurl_errors.CURLE_GOT_NOTHING,
241
264
                        _pycurl_errors.CURLE_COULDNT_RESOLVE_PROXY):
242
265
                raise ConnectionError('curl connection error (%s)\non %s'
243
266
                              % (e[1], url))
244
 
            # jam 20060713 The code didn't use to re-raise the exception here
 
267
            elif e[0] == _pycurl_errors.CURLE_PARTIAL_FILE:
 
268
                # Pycurl itself has detected a short read.  We do
 
269
                # not have all the information for the
 
270
                # ShortReadvError, but that should be enough
 
271
                raise errors.ShortReadvError(url,
 
272
                                             offset='unknown', length='unknown',
 
273
                                             actual='unknown',
 
274
                                             extra='Server aborted the request')
 
275
            # jam 20060713 The code didn't use to re-raise the exception here,
245
276
            # but that seemed bogus
246
277
            raise
247
278
 
248
279
 
249
 
class HttpServer_PyCurl(HttpServer):
250
 
    """Subclass of HttpServer that gives http+pycurl urls.
251
 
 
252
 
    This is for use in testing: connections to this server will always go
253
 
    through pycurl where possible.
254
 
    """
255
 
 
256
 
    # urls returned by this server should require the pycurl client impl
257
 
    _url_protocol = 'http+pycurl'
258
 
 
259
 
 
260
280
def get_test_permutations():
261
281
    """Return the permutations to be used in testing."""
 
282
    from bzrlib.tests.HttpServer import HttpServer_PyCurl
262
283
    return [(PyCurlTransport, HttpServer_PyCurl),
263
284
            ]