bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
1393.2.1
by John Arbash Meinel
Merged in split-storage-2 branch. Need to cleanup a little bit more still. |
1 |
# $Id: http_client.py 271 2004-10-09 10:50:59Z fredrik $
|
2 |
# a simple asynchronous http client (based on SimpleAsyncHTTP.py from
|
|
3 |
# "Python Standard Library" by Fredrik Lundh, O'Reilly 2001)
|
|
4 |
#
|
|
5 |
# HTTP/1.1 and GZIP support added in January 2003 by Fredrik Lundh.
|
|
6 |
#
|
|
7 |
# changes:
|
|
8 |
# 2004-08-26 fl unified http callback
|
|
9 |
# 2004-10-09 fl factored out gzip_consumer support
|
|
10 |
#
|
|
11 |
# Copyright (c) 2001-2004 by Fredrik Lundh. All rights reserved.
|
|
12 |
#
|
|
13 |
||
|
6379.6.3
by Jelmer Vernooij
Use absolute_import. |
14 |
from __future__ import absolute_import |
15 |
||
|
1393.2.1
by John Arbash Meinel
Merged in split-storage-2 branch. Need to cleanup a little bit more still. |
16 |
import asyncore |
17 |
import socket, string, time, sys |
|
18 |
import StringIO |
|
19 |
import mimetools, urlparse, urllib |
|
20 |
||
21 |
try: |
|
22 |
from gzip_consumer import GzipConsumer |
|
23 |
except ImportError: |
|
24 |
pass
|
|
25 |
||
26 |
##
|
|
27 |
# Close connection. Request handlers can raise this exception to
|
|
28 |
# indicate that the connection should be closed.
|
|
29 |
||
30 |
class CloseConnection(Exception): |
|
31 |
pass
|
|
32 |
||
33 |
##
|
|
34 |
# Redirect connection. Request handlers can raise this exception to
|
|
35 |
# indicate that the a new request should be issued.
|
|
36 |
||
37 |
class Redirect(CloseConnection): |
|
38 |
def __init__(self, location): |
|
39 |
self.location = location |
|
40 |
||
41 |
##
|
|
42 |
# Asynchronous HTTP/1.1 client.
|
|
43 |
||
44 |
class async_http(asyncore.dispatcher_with_send): |
|
45 |
# asynchronous http client
|
|
46 |
||
47 |
user_agent = "http_client.py 1.2 (http://effbot.org/zone)" |
|
48 |
http_version = "1.1" |
|
49 |
||
50 |
proxies = urllib.getproxies() |
|
51 |
||
52 |
def __init__(self, uri, consumer, extra_headers=None): |
|
53 |
asyncore.dispatcher_with_send.__init__(self) |
|
54 |
||
55 |
# turn the uri into a valid request
|
|
56 |
scheme, host, path, params, query, fragment = urlparse.urlparse(uri) |
|
57 |
||
58 |
# use origin host
|
|
59 |
self.host = host |
|
60 |
||
61 |
# get proxy settings, if any
|
|
62 |
proxy = self.proxies.get(scheme) |
|
63 |
if proxy: |
|
64 |
scheme, host, x, x, x, x = urlparse.urlparse(proxy) |
|
65 |
||
66 |
assert scheme == "http", "only supports HTTP requests (%s)" % scheme |
|
67 |
||
68 |
if not path: |
|
69 |
path = "/" |
|
70 |
if params: |
|
71 |
path = path + ";" + params |
|
72 |
if query: |
|
73 |
path = path + "?" + query |
|
74 |
if proxy: |
|
75 |
path = scheme + "://" + self.host + path |
|
76 |
||
77 |
self.path = path |
|
78 |
||
79 |
# get port number
|
|
80 |
try: |
|
81 |
host, port = host.split(":", 1) |
|
82 |
port = int(port) |
|
83 |
except (TypeError, ValueError): |
|
84 |
port = 80 # default port |
|
85 |
||
86 |
self.consumer = consumer |
|
87 |
||
88 |
self.status = None |
|
89 |
self.header = None |
|
90 |
||
91 |
self.bytes_in = 0 |
|
92 |
self.bytes_out = 0 |
|
93 |
||
94 |
self.content_type = None |
|
95 |
self.content_length = None |
|
96 |
self.content_encoding = None |
|
97 |
self.transfer_encoding = None |
|
98 |
||
99 |
self.data = "" |
|
100 |
||
101 |
self.chunk_size = None |
|
102 |
||
103 |
self.timestamp = time.time() |
|
104 |
||
105 |
self.extra_headers = extra_headers |
|
106 |
||
107 |
self.create_socket(socket.AF_INET, socket.SOCK_STREAM) |
|
108 |
try: |
|
109 |
self.connect((host, port)) |
|
110 |
except socket.error: |
|
111 |
self.consumer.http(0, self, sys.exc_info()) |
|
112 |
||
113 |
def handle_connect(self): |
|
114 |
# connection succeeded
|
|
115 |
||
116 |
request = [ |
|
117 |
"GET %s HTTP/%s" % (self.path, self.http_version), |
|
118 |
"Host: %s" % self.host, |
|
119 |
]
|
|
120 |
||
121 |
if GzipConsumer: |
|
122 |
request.append("Accept-Encoding: gzip") |
|
123 |
||
124 |
if self.extra_headers: |
|
125 |
request.extend(self.extra_headers) |
|
126 |
||
127 |
# make sure to include a user agent
|
|
128 |
for header in request: |
|
129 |
if string.lower(header).startswith("user-agent:"): |
|
130 |
break
|
|
131 |
else: |
|
132 |
request.append("User-Agent: %s" % self.user_agent) |
|
133 |
||
134 |
request = string.join(request, "\r\n") + "\r\n\r\n" |
|
135 |
||
136 |
self.send(request) |
|
137 |
||
138 |
self.bytes_out = self.bytes_out + len(request) |
|
139 |
||
140 |
def handle_expt(self): |
|
141 |
# connection failed (windows); notify consumer
|
|
142 |
||
143 |
if sys.platform == "win32": |
|
144 |
self.close() |
|
145 |
self.consumer.http(0, self) |
|
146 |
||
147 |
def handle_read(self): |
|
148 |
# handle incoming data
|
|
149 |
||
150 |
data = self.recv(2048) |
|
151 |
||
152 |
self.data = self.data + data |
|
153 |
self.bytes_in = self.bytes_in + len(data) |
|
154 |
||
155 |
while self.data: |
|
156 |
||
157 |
if not self.header: |
|
158 |
# check if we've seen a full header
|
|
159 |
||
160 |
header = self.data.split("\r\n\r\n", 1) |
|
161 |
if len(header) <= 1: |
|
162 |
return
|
|
163 |
header, self.data = header |
|
164 |
||
165 |
# parse header
|
|
166 |
fp = StringIO.StringIO(header) |
|
167 |
self.status = fp.readline().split(" ", 2) |
|
168 |
self.header = mimetools.Message(fp) |
|
169 |
||
170 |
# get http headers
|
|
171 |
self.content_type = self.header.get("content-type") |
|
172 |
try: |
|
173 |
self.content_length = int( |
|
174 |
self.header.get("content-length") |
|
175 |
)
|
|
176 |
except (ValueError, TypeError): |
|
177 |
self.content_length = None |
|
178 |
self.transfer_encoding = self.header.get("transfer-encoding") |
|
179 |
self.content_encoding = self.header.get("content-encoding") |
|
180 |
||
181 |
if self.content_encoding == "gzip": |
|
182 |
# FIXME: report error if GzipConsumer is not available
|
|
183 |
self.consumer = GzipConsumer(self.consumer) |
|
184 |
||
185 |
try: |
|
186 |
self.consumer.http(1, self) |
|
187 |
except Redirect, v: |
|
188 |
# redirect
|
|
189 |
if v.location: |
|
190 |
do_request( |
|
191 |
v.location, self.consumer, self.extra_headers |
|
192 |
)
|
|
193 |
self.close() |
|
194 |
return
|
|
195 |
except CloseConnection: |
|
196 |
self.close() |
|
197 |
return
|
|
198 |
||
199 |
if self.transfer_encoding == "chunked" and self.chunk_size is None: |
|
200 |
||
201 |
# strip off leading whitespace
|
|
202 |
if self.data.startswith("\r\n"): |
|
203 |
self.data = self.data[2:] |
|
204 |
||
205 |
chunk_size = self.data.split("\r\n", 1) |
|
206 |
if len(chunk_size) <= 1: |
|
207 |
return
|
|
208 |
chunk_size, self.data = chunk_size |
|
209 |
||
210 |
try: |
|
211 |
self.chunk_size = int(chunk_size, 16) |
|
212 |
if self.chunk_size <= 0: |
|
213 |
raise ValueError |
|
214 |
except ValueError: |
|
215 |
return self.handle_close() |
|
216 |
||
217 |
if not self.data: |
|
218 |
return
|
|
219 |
||
220 |
data = self.data |
|
221 |
self.data = "" |
|
222 |
||
223 |
chunk_size = self.chunk_size or len(data) |
|
224 |
||
225 |
if chunk_size < len(data): |
|
226 |
self.data = data[chunk_size:] |
|
227 |
data = data[:chunk_size] |
|
228 |
self.chunk_size = None |
|
229 |
else: |
|
230 |
self.chunk_size = chunk_size - len(data) |
|
231 |
if self.chunk_size <= 0: |
|
232 |
self.chunk_size = None |
|
233 |
||
234 |
if data: |
|
235 |
self.consumer.feed(data) |
|
236 |
||
237 |
if self.content_length: |
|
238 |
self.content_length -= chunk_size |
|
239 |
if self.content_length <= 0: |
|
240 |
return self.handle_close() |
|
241 |
||
242 |
def handle_close(self): |
|
243 |
self.consumer.close() |
|
244 |
self.close() |
|
245 |
||
246 |
def handle_error(self): |
|
247 |
self.consumer.http(0, self, sys.exc_info()) |
|
248 |
self.close() |
|
249 |
||
250 |
def do_request(uri, consumer, extra_headers=None): |
|
251 |
||
252 |
return async_http(uri, consumer, extra_headers) |
|
253 |
||
254 |
if __name__ == "__main__": |
|
255 |
class dummy_consumer: |
|
256 |
def feed(self, data): |
|
257 |
# print "feed", repr(data)
|
|
258 |
print "feed", repr(data[:20]), repr(data[-20:]), len(data) |
|
259 |
def close(self): |
|
260 |
print "close" |
|
261 |
def http(self, ok, connection, **args): |
|
262 |
print ok, connection, args |
|
263 |
print "status", connection.status |
|
264 |
print "header", connection.header |
|
265 |
try: |
|
266 |
url = sys.argv[1] |
|
267 |
except IndexError: |
|
268 |
url = "http://www.cnn.com/" |
|
269 |
do_request(url, dummy_consumer()) |
|
270 |
asyncore.loop() |