bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
1 |
# Copyright (C) 2006 Canonical Ltd
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
2 |
#
|
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
7 |
#
|
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
8 |
# This program is distributed in the hope that it will be useful,
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
12 |
#
|
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
13 |
# You should have received a copy of the GNU General Public License
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""http/https transport using pycurl"""
|
|
18 |
||
19 |
# TODO: test reporting of http errors
|
|
|
1887.1.1
by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines, |
20 |
#
|
|
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
21 |
# TODO: Transport option to control caching of particular requests; broadly we
|
22 |
# would want to offer "caching allowed" or "must revalidate", depending on
|
|
23 |
# whether we expect a particular file will be modified after it's committed.
|
|
24 |
# It's probably safer to just always revalidate. mbp 20060321
|
|
25 |
||
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
26 |
import os |
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
27 |
from cStringIO import StringIO |
|
1540.3.5
by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes |
28 |
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
29 |
from bzrlib import errors |
|
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
30 |
import bzrlib |
|
1540.3.5
by Martin Pool
Raise exception if unicode is passed to transport; formatting fixes |
31 |
from bzrlib.errors import (TransportNotPossible, NoSuchFile, |
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
32 |
TransportError, ConnectionError, |
33 |
DependencyNotPresent) |
|
|
1540.3.18
by Martin Pool
Style review fixes (thanks robertc) |
34 |
from bzrlib.trace import mutter |
|
1636.1.2
by Robert Collins
More review fixen to the relpath at '/' fixes. |
35 |
from bzrlib.transport import register_urlparse_netloc_protocol |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
36 |
from bzrlib.transport.http import (HttpTransportBase, HttpServer, |
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
37 |
_extract_headers, |
38 |
response, _pycurl_errors) |
|
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
39 |
|
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
40 |
try: |
41 |
import pycurl |
|
42 |
except ImportError, e: |
|
43 |
mutter("failed to import pycurl: %s", e) |
|
44 |
raise DependencyNotPresent('pycurl', e) |
|
45 |
||
|
1684.1.5
by Martin Pool
(patch) check that pycurl will actuall initialize as well as load (Alexander) |
46 |
try: |
47 |
# see if we can actually initialize PyCurl - sometimes it will load but
|
|
48 |
# fail to start up due to this bug:
|
|
49 |
#
|
|
50 |
# 32. (At least on Windows) If libcurl is built with c-ares and there's
|
|
51 |
# no DNS server configured in the system, the ares_init() call fails and
|
|
52 |
# thus curl_easy_init() fails as well. This causes weird effects for
|
|
53 |
# people who use numerical IP addresses only.
|
|
54 |
#
|
|
55 |
# reported by Alexander Belchenko, 2006-04-26
|
|
56 |
pycurl.Curl() |
|
57 |
except pycurl.error, e: |
|
58 |
mutter("failed to initialize pycurl: %s", e) |
|
59 |
raise DependencyNotPresent('pycurl', e) |
|
60 |
||
|
1540.3.7
by Martin Pool
Prepare to select a transport depending on what dependencies can be satisfied. |
61 |
|
|
1636.1.2
by Robert Collins
More review fixen to the relpath at '/' fixes. |
62 |
register_urlparse_netloc_protocol('http+pycurl') |
|
1636.1.1
by Robert Collins
Fix calling relpath() and abspath() on transports at their root. |
63 |
|
64 |
||
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
65 |
class PyCurlTransport(HttpTransportBase): |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
66 |
"""http client transport using pycurl |
67 |
||
68 |
PyCurl is a Python binding to the C "curl" multiprotocol client.
|
|
69 |
||
70 |
This transport can be significantly faster than the builtin Python client.
|
|
71 |
Advantages include: DNS caching, connection keepalive, and ability to
|
|
72 |
set headers to allow caching.
|
|
73 |
"""
|
|
74 |
||
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
75 |
def __init__(self, base, from_transport=None): |
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
76 |
super(PyCurlTransport, self).__init__(base) |
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
77 |
if from_transport is not None: |
78 |
self._base_curl = from_transport._base_curl |
|
79 |
self._range_curl = from_transport._range_curl |
|
80 |
else: |
|
81 |
mutter('using pycurl %s' % pycurl.version) |
|
82 |
self._base_curl = pycurl.Curl() |
|
83 |
self._range_curl = pycurl.Curl() |
|
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
84 |
|
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
85 |
def should_cache(self): |
86 |
"""Return True if the data pulled across should be cached locally. |
|
87 |
"""
|
|
88 |
return True |
|
89 |
||
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
90 |
def has(self, relpath): |
|
1786.1.32
by John Arbash Meinel
cleanup pass, allow pycurl connections to be shared between transports. |
91 |
"""See Transport.has()""" |
92 |
# We set NO BODY=0 in _get_full, so it should be safe
|
|
93 |
# to re-use the non-range curl object
|
|
94 |
curl = self._base_curl |
|
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
95 |
abspath = self._real_abspath(relpath) |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
96 |
curl.setopt(pycurl.URL, abspath) |
97 |
self._set_curl_options(curl) |
|
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
98 |
# don't want the body - ie just do a HEAD request
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
99 |
# This means "NO BODY" not 'nobody'
|
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
100 |
curl.setopt(pycurl.NOBODY, 1) |
101 |
self._curl_perform(curl) |
|
102 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
103 |
if code == 404: # not found |
|
104 |
return False |
|
105 |
elif code in (200, 302): # "ok", "found" |
|
106 |
return True |
|
107 |
else: |
|
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
108 |
self._raise_curl_http_error(curl) |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
109 |
|
|
1786.1.8
by John Arbash Meinel
[merge] Johan Rydberg test updates |
110 |
def _get(self, relpath, ranges, tail_amount=0): |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
111 |
# This just switches based on the type of request
|
112 |
if ranges is not None or tail_amount not in (0, None): |
|
113 |
return self._get_ranged(relpath, ranges, tail_amount=tail_amount) |
|
114 |
else: |
|
115 |
return self._get_full(relpath) |
|
116 |
||
117 |
def _setup_get_request(self, curl, relpath): |
|
118 |
"""Do the common setup stuff for making a request |
|
119 |
||
120 |
:param curl: The curl object to place the request on
|
|
121 |
:param relpath: The relative path that we want to get
|
|
122 |
:return: (abspath, data, header)
|
|
123 |
abspath: full url
|
|
124 |
data: file that will be filled with the body
|
|
125 |
header: file that will be filled with the headers
|
|
126 |
"""
|
|
127 |
abspath = self._real_abspath(relpath) |
|
128 |
curl.setopt(pycurl.URL, abspath) |
|
129 |
self._set_curl_options(curl) |
|
|
1786.1.35
by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0) |
130 |
# Make sure we do a GET request. versions > 7.14.1 also set the
|
131 |
# NO BODY flag, but we'll do it ourselves in case it is an older
|
|
132 |
# pycurl version
|
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
133 |
curl.setopt(pycurl.NOBODY, 0) |
|
1786.1.35
by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0) |
134 |
curl.setopt(pycurl.HTTPGET, 1) |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
135 |
|
136 |
data = StringIO() |
|
137 |
header = StringIO() |
|
138 |
curl.setopt(pycurl.WRITEFUNCTION, data.write) |
|
139 |
curl.setopt(pycurl.HEADERFUNCTION, header.write) |
|
140 |
||
141 |
return abspath, data, header |
|
142 |
||
143 |
def _get_full(self, relpath): |
|
144 |
"""Make a request for the entire file""" |
|
145 |
curl = self._base_curl |
|
146 |
abspath, data, header = self._setup_get_request(curl, relpath) |
|
147 |
self._curl_perform(curl) |
|
148 |
||
149 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
150 |
data.seek(0) |
|
151 |
||
152 |
if code == 404: |
|
153 |
raise NoSuchFile(abspath) |
|
154 |
if code != 200: |
|
|
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
155 |
self._raise_curl_http_error(curl, 'expected 200 or 404 for full response.') |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
156 |
|
157 |
return code, data |
|
158 |
||
159 |
def _get_ranged(self, relpath, ranges, tail_amount): |
|
160 |
"""Make a request for just part of the file.""" |
|
161 |
# We would like to re-use the same curl object for
|
|
162 |
# full requests and partial requests
|
|
|
1786.1.2
by John Arbash Meinel
Use 2 pycurl objects because of RANGE peculiarities. |
163 |
# Documentation says 'Pass in NULL to disable the use of ranges'
|
164 |
# None is the closest we have, but at least with pycurl 7.13.1
|
|
165 |
# It raises an 'invalid arguments' response
|
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
166 |
# curl.setopt(pycurl.RANGE, None)
|
167 |
# curl.unsetopt(pycurl.RANGE) doesn't support the RANGE parameter
|
|
168 |
# So instead we hack around this by using a separate objects
|
|
169 |
curl = self._range_curl |
|
170 |
abspath, data, header = self._setup_get_request(curl, relpath) |
|
171 |
||
|
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
172 |
curl.setopt(pycurl.RANGE, self.range_header(ranges, tail_amount)) |
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
173 |
self._curl_perform(curl) |
|
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
174 |
data.seek(0) |
175 |
||
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
176 |
code = curl.getinfo(pycurl.HTTP_CODE) |
|
1979.1.1
by John Arbash Meinel
Fix bug #57723, parse boundary="" correctly, since Squid uses it |
177 |
# mutter('header:\n%r', header.getvalue())
|
|
1786.1.42
by John Arbash Meinel
Update _extract_headers, make it less generic, and non recursive. |
178 |
headers = _extract_headers(header.getvalue(), abspath) |
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
179 |
# handle_response will raise NoSuchFile, etc based on the response code
|
180 |
return code, response.handle_response(abspath, code, headers, data) |
|
|
1786.1.4
by John Arbash Meinel
Adding HEADERFUNCTION which lets us get any response codes we want. |
181 |
|
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
182 |
def _raise_curl_connection_error(self, curl): |
183 |
curl_errno = curl.getinfo(pycurl.OS_ERRNO) |
|
184 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
185 |
raise ConnectionError('curl connection error (%s) on %s' |
|
186 |
% (os.strerror(curl_errno), url)) |
|
187 |
||
|
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
188 |
def _raise_curl_http_error(self, curl, info=None): |
|
1612.1.1
by Martin Pool
Raise errors correctly on pycurl connection failure |
189 |
code = curl.getinfo(pycurl.HTTP_CODE) |
190 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
|
|
1786.1.40
by John Arbash Meinel
code cleanups from Martin Pool. |
191 |
if info is None: |
192 |
msg = '' |
|
193 |
else: |
|
194 |
msg = ': ' + info |
|
195 |
raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s' |
|
196 |
% (code,msg)) |
|
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
197 |
|
|
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
198 |
def _set_curl_options(self, curl): |
199 |
"""Set options for all requests""" |
|
|
1540.3.11
by Martin Pool
doc |
200 |
# There's no way in http/1.0 to say "must revalidate"; we don't want
|
201 |
# to force it to always retrieve. so just turn off the default Pragma
|
|
202 |
# provided by Curl.
|
|
|
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
203 |
headers = ['Cache-control: max-age=0', |
|
1786.1.22
by John Arbash Meinel
Add Keep-Alive for 1.0 servers |
204 |
'Pragma: no-cache', |
205 |
'Connection: Keep-Alive'] |
|
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
206 |
## curl.setopt(pycurl.VERBOSE, 1)
|
|
1616.1.9
by Martin Pool
Set Cache-control: max-age=0 and Pragma: no-cache |
207 |
# TODO: maybe include a summary of the pycurl version
|
|
1786.1.33
by John Arbash Meinel
Cleanup pass #2 |
208 |
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,) |
|
1540.3.15
by Martin Pool
[merge] large merge to sync with bzr.dev |
209 |
curl.setopt(pycurl.USERAGENT, ua_str) |
|
1540.3.13
by Martin Pool
Curl should follow http redirects, the same as urllib |
210 |
curl.setopt(pycurl.HTTPHEADER, headers) |
211 |
curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses |
|
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
212 |
|
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
213 |
def _curl_perform(self, curl): |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
214 |
"""Perform curl operation and translate exceptions.""" |
215 |
try: |
|
|
1540.3.14
by Martin Pool
[pycurl] Make Curl instance a local variable not a long-lived object. |
216 |
curl.perform() |
|
1540.3.3
by Martin Pool
Review updates of pycurl transport |
217 |
except pycurl.error, e: |
218 |
# XXX: There seem to be no symbolic constants for these values.
|
|
|
1786.1.35
by John Arbash Meinel
For pycurl inverse of (NOBODY,1) is (HTTPGET,1) not (NOBODY,0) |
219 |
url = curl.getinfo(pycurl.EFFECTIVE_URL) |
220 |
mutter('got pycurl error: %s, %s, %s, url: %s ', |
|
221 |
e[0], _pycurl_errors.errorcode[e[0]], e, url) |
|
222 |
if e[0] in (_pycurl_errors.CURLE_COULDNT_RESOLVE_HOST, |
|
223 |
_pycurl_errors.CURLE_COULDNT_CONNECT): |
|
|
1786.1.27
by John Arbash Meinel
Fix up the http transports so that tests pass with the new configuration. |
224 |
self._raise_curl_connection_error(curl) |
225 |
# jam 20060713 The code didn't use to re-raise the exception here
|
|
226 |
# but that seemed bogus
|
|
227 |
raise
|
|
|
1540.3.1
by Martin Pool
First-cut implementation of pycurl. Substantially faster than using urllib. |
228 |
|
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
229 |
|
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
230 |
class HttpServer_PyCurl(HttpServer): |
231 |
"""Subclass of HttpServer that gives http+pycurl urls. |
|
232 |
||
233 |
This is for use in testing: connections to this server will always go
|
|
234 |
through pycurl where possible.
|
|
235 |
"""
|
|
236 |
||
237 |
# urls returned by this server should require the pycurl client impl
|
|
238 |
_url_protocol = 'http+pycurl' |
|
239 |
||
240 |
||
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
241 |
def get_test_permutations(): |
242 |
"""Return the permutations to be used in testing.""" |
|
|
1540.3.24
by Martin Pool
Add new protocol 'http+pycurl' that always uses PyCurl. |
243 |
return [(PyCurlTransport, HttpServer_PyCurl), |
|
1540.3.10
by Martin Pool
[broken] keep hooking pycurl into test framework |
244 |
]
|