/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
1
# Copyright (C) 2006 Canonical Ltd
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
2
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
7
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
12
#
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""http/https transport using pycurl"""
18
19
# TODO: test reporting of http errors
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
20
#
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
21
# TODO: Transport option to control caching of particular requests; broadly we
22
# would want to offer "caching allowed" or "must revalidate", depending on
23
# whether we expect a particular file will be modified after it's committed.
24
# It's probably safer to just always revalidate.  mbp 20060321
25
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
26
import os
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
27
from cStringIO import StringIO
1540.3.5 by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes
28
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
29
from bzrlib import errors
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
30
import bzrlib
1540.3.5 by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes
31
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
32
                           TransportError, ConnectionError,
33
                           DependencyNotPresent)
1540.3.18 by Martin Pool
Style review fixes (thanks robertc)
34
from bzrlib.trace import mutter
1636.1.2 by Robert Collins
More review fixen to the relpath at '/' fixes.
35
from bzrlib.transport import register_urlparse_netloc_protocol
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
36
from bzrlib.transport.http import (HttpTransportBase, HttpServer,
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
37
                                   _extract_headers,
38
                                   response, _pycurl_errors)
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
39
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
40
try:
41
    import pycurl
42
except ImportError, e:
43
    mutter("failed to import pycurl: %s", e)
44
    raise DependencyNotPresent('pycurl', e)
45
1684.1.5 by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander)
46
try:
47
    # see if we can actually initialize PyCurl - sometimes it will load but
48
    # fail to start up due to this bug:
49
    #  
50
    #   32. (At least on Windows) If libcurl is built with c-ares and there's
51
    #   no DNS server configured in the system, the ares_init() call fails and
52
    #   thus curl_easy_init() fails as well. This causes weird effects for
53
    #   people who use numerical IP addresses only.
54
    #
55
    # reported by Alexander Belchenko, 2006-04-26
56
    pycurl.Curl()
57
except pycurl.error, e:
58
    mutter("failed to initialize pycurl: %s", e)
59
    raise DependencyNotPresent('pycurl', e)
60
1540.3.7 by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied.
61
1636.1.2 by Robert Collins
More review fixen to the relpath at '/' fixes.
62
register_urlparse_netloc_protocol('http+pycurl')
1636.1.1 by Robert Collins
Fix calling relpath() and abspath() on transports at their root.
63
64
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
65
class PyCurlTransport(HttpTransportBase):
1540.3.3 by Martin Pool
Review updates of pycurl transport
66
    """http client transport using pycurl
67
68
    PyCurl is a Python binding to the C "curl" multiprotocol client.
69
70
    This transport can be significantly faster than the builtin Python client. 
71
    Advantages include: DNS caching, connection keepalive, and ability to 
72
    set headers to allow caching.
73
    """
74
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
75
    def __init__(self, base, from_transport=None):
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
76
        super(PyCurlTransport, self).__init__(base)
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
77
        if from_transport is not None:
78
            self._base_curl = from_transport._base_curl
79
            self._range_curl = from_transport._range_curl
80
        else:
81
            mutter('using pycurl %s' % pycurl.version)
82
            self._base_curl = pycurl.Curl()
83
            self._range_curl = pycurl.Curl()
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
84
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
85
    def should_cache(self):
86
        """Return True if the data pulled across should be cached locally.
87
        """
88
        return True
89
1540.3.3 by Martin Pool
Review updates of pycurl transport
90
    def has(self, relpath):
1786.1.32 by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports.
91
        """See Transport.has()"""
92
        # We set NO BODY=0 in _get_full, so it should be safe
93
        # to re-use the non-range curl object
94
        curl = self._base_curl
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
95
        abspath = self._real_abspath(relpath)
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
96
        curl.setopt(pycurl.URL, abspath)
97
        self._set_curl_options(curl)
1540.3.3 by Martin Pool
Review updates of pycurl transport
98
        # don't want the body - ie just do a HEAD request
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
99
        # This means "NO BODY" not 'nobody'
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
100
        curl.setopt(pycurl.NOBODY, 1)
101
        self._curl_perform(curl)
102
        code = curl.getinfo(pycurl.HTTP_CODE)
103
        if code == 404: # not found
104
            return False
105
        elif code in (200, 302): # "ok", "found"
106
            return True
107
        else:
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
108
            self._raise_curl_http_error(curl)
1540.3.3 by Martin Pool
Review updates of pycurl transport
109
        
1786.1.8 by John Arbash Meinel
[merge] Johan Rydberg test updates
110
    def _get(self, relpath, ranges, tail_amount=0):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
111
        # This just switches based on the type of request
112
        if ranges is not None or tail_amount not in (0, None):
113
            return self._get_ranged(relpath, ranges, tail_amount=tail_amount)
114
        else:
115
            return self._get_full(relpath)
116
    
117
    def _setup_get_request(self, curl, relpath):
118
        """Do the common setup stuff for making a request
119
120
        :param curl: The curl object to place the request on
121
        :param relpath: The relative path that we want to get
122
        :return: (abspath, data, header) 
123
                 abspath: full url
124
                 data: file that will be filled with the body
125
                 header: file that will be filled with the headers
126
        """
127
        abspath = self._real_abspath(relpath)
128
        curl.setopt(pycurl.URL, abspath)
129
        self._set_curl_options(curl)
1786.1.35 by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0)
130
        # Make sure we do a GET request. versions > 7.14.1 also set the
131
        # NO BODY flag, but we'll do it ourselves in case it is an older
132
        # pycurl version
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
133
        curl.setopt(pycurl.NOBODY, 0)
1786.1.35 by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0)
134
        curl.setopt(pycurl.HTTPGET, 1)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
135
136
        data = StringIO()
137
        header = StringIO()
138
        curl.setopt(pycurl.WRITEFUNCTION, data.write)
139
        curl.setopt(pycurl.HEADERFUNCTION, header.write)
140
141
        return abspath, data, header
142
143
    def _get_full(self, relpath):
144
        """Make a request for the entire file"""
145
        curl = self._base_curl
146
        abspath, data, header = self._setup_get_request(curl, relpath)
147
        self._curl_perform(curl)
148
149
        code = curl.getinfo(pycurl.HTTP_CODE)
150
        data.seek(0)
151
152
        if code == 404:
153
            raise NoSuchFile(abspath)
154
        if code != 200:
1786.1.40 by John Arbash Meinel
code cleanups from Martin Pool.
155
            self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.')
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
156
157
        return code, data
158
159
    def _get_ranged(self, relpath, ranges, tail_amount):
160
        """Make a request for just part of the file."""
161
        # We would like to re-use the same curl object for 
162
        # full requests and partial requests
1786.1.2 by John Arbash Meinel
Use 2 pycurl objects because of RANGE peculiarities.
163
        # Documentation says 'Pass in NULL to disable the use of ranges'
164
        # None is the closest we have, but at least with pycurl 7.13.1
165
        # It raises an 'invalid arguments' response
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
166
        # curl.setopt(pycurl.RANGE, None)
167
        # curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
168
        # So instead we hack around this by using a separate objects
169
        curl = self._range_curl
170
        abspath, data, header = self._setup_get_request(curl, relpath)
171
1786.1.33 by John Arbash Meinel
Cleanup pass #2
172
        curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount))
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
173
        self._curl_perform(curl)
1786.1.33 by John Arbash Meinel
Cleanup pass #2
174
        data.seek(0)
175
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
176
        code = curl.getinfo(pycurl.HTTP_CODE)
1979.1.1 by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it
177
        # mutter('header:\n%r', header.getvalue())
1786.1.42 by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive.
178
        headers = _extract_headers(header.getvalue(), abspath)
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
179
        # handle_response will raise NoSuchFile, etc based on the response code
180
        return code, response.handle_response(abspath, code, headers, data)
1786.1.4 by John Arbash Meinel
Adding HEADERFUNCTION which lets us get any response codes we want.
181
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
182
    def _raise_curl_connection_error(self, curl):
183
        curl_errno = curl.getinfo(pycurl.OS_ERRNO)
184
        url = curl.getinfo(pycurl.EFFECTIVE_URL)
185
        raise ConnectionError('curl connection error (%s) on %s'
186
                              % (os.strerror(curl_errno), url))
187
1786.1.40 by John Arbash Meinel
code cleanups from Martin Pool.
188
    def _raise_curl_http_error(self, curl, info=None):
1612.1.1 by Martin Pool
Raise errors correctly on pycurl connection failure
189
        code = curl.getinfo(pycurl.HTTP_CODE)
190
        url = curl.getinfo(pycurl.EFFECTIVE_URL)
1786.1.40 by John Arbash Meinel
code cleanups from Martin Pool.
191
        if info is None:
192
            msg = ''
193
        else:
194
            msg = ': ' + info
195
        raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s'
196
                                              % (code,msg))
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
197
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
198
    def _set_curl_options(self, curl):
199
        """Set options for all requests"""
1540.3.11 by Martin Pool
doc
200
        # There's no way in http/1.0 to say "must revalidate"; we don't want
201
        # to force it to always retrieve.  so just turn off the default Pragma
202
        # provided by Curl.
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
203
        headers = ['Cache-control: max-age=0',
1786.1.22 by John Arbash Meinel
Add Keep-Alive for 1.0 servers
204
                   'Pragma: no-cache',
205
                   'Connection: Keep-Alive']
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
206
        ## curl.setopt(pycurl.VERBOSE, 1)
1616.1.9 by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache
207
        # TODO: maybe include a summary of the pycurl version
1786.1.33 by John Arbash Meinel
Cleanup pass #2
208
        ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
1540.3.15 by Martin Pool
[merge] large merge to sync with bzr.dev
209
        curl.setopt(pycurl.USERAGENT, ua_str)
1540.3.13 by Martin Pool
Curl should follow http redirects, the same as urllib
210
        curl.setopt(pycurl.HTTPHEADER, headers)
211
        curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
1540.3.3 by Martin Pool
Review updates of pycurl transport
212
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
213
    def _curl_perform(self, curl):
1540.3.3 by Martin Pool
Review updates of pycurl transport
214
        """Perform curl operation and translate exceptions."""
215
        try:
1540.3.14 by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object.
216
            curl.perform()
1540.3.3 by Martin Pool
Review updates of pycurl transport
217
        except pycurl.error, e:
218
            # XXX: There seem to be no symbolic constants for these values.
1786.1.35 by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0)
219
            url = curl.getinfo(pycurl.EFFECTIVE_URL)
220
            mutter('got pycurl error: %s, %s, %s, url: %s ',
221
                    e[0], _pycurl_errors.errorcode[e[0]], e, url)
222
            if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST,
223
                        _pycurl_errors.CURLE_COULDNT_CONNECT):
1786.1.27 by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration.
224
                self._raise_curl_connection_error(curl)
225
            # jam 20060713 The code didn't use to re-raise the exception here
226
            # but that seemed bogus
227
            raise
1540.3.1 by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib.
228
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
229
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
230
class HttpServer_PyCurl(HttpServer):
231
    """Subclass of HttpServer that gives http+pycurl urls.
232
233
    This is for use in testing: connections to this server will always go
234
    through pycurl where possible.
235
    """
236
237
    # urls returned by this server should require the pycurl client impl
238
    _url_protocol = 'http+pycurl'
239
240
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
241
def get_test_permutations():
242
    """Return the permutations to be used in testing."""
1540.3.24 by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl.
243
    return [(PyCurlTransport, HttpServer_PyCurl),
1540.3.10 by Martin Pool
[broken] keep hooking pycurl into test framework
244
            ]