/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
1393.2.1 by John Arbash Meinel
Merged in split-storage-2 branch. Need to cleanup a little bit more still.
1
# $Id: http_client.py 271 2004-10-09 10:50:59Z fredrik $
2
# a simple asynchronous http client (based on SimpleAsyncHTTP.py from
3
# "Python Standard Library" by Fredrik Lundh, O'Reilly 2001)
4
#
5
# HTTP/1.1 and GZIP support added in January 2003 by Fredrik Lundh.
6
#
7
# changes:
8
# 2004-08-26 fl   unified http callback
9
# 2004-10-09 fl   factored out gzip_consumer support
10
#
11
# Copyright (c) 2001-2004 by Fredrik Lundh.  All rights reserved.
12
#
13
6379.6.3 by Jelmer Vernooij
Use absolute_import.
14
from __future__ import absolute_import
15
1393.2.1 by John Arbash Meinel
Merged in split-storage-2 branch. Need to cleanup a little bit more still.
16
import asyncore
17
import socket, string, time, sys
18
import StringIO
19
import mimetools, urlparse, urllib
20
21
try:
22
    from gzip_consumer import GzipConsumer
23
except ImportError:
24
    pass
25
26
##
27
# Close connection.   Request handlers can raise this exception to
28
# indicate that the connection should be closed.
29
30
class CloseConnection(Exception):
31
    pass
32
33
##
34
# Redirect connection.  Request handlers can raise this exception to
35
# indicate that the a new request should be issued.
36
37
class Redirect(CloseConnection):
38
    def __init__(self, location):
39
        self.location = location
40
41
##
42
# Asynchronous HTTP/1.1 client.
43
44
class async_http(asyncore.dispatcher_with_send):
45
    # asynchronous http client
46
47
    user_agent = "http_client.py 1.2 (http://effbot.org/zone)"
48
    http_version = "1.1"
49
50
    proxies = urllib.getproxies()
51
52
    def __init__(self, uri, consumer, extra_headers=None):
53
        asyncore.dispatcher_with_send.__init__(self)
54
55
        # turn the uri into a valid request
56
        scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
57
58
        # use origin host
59
        self.host = host
60
61
        # get proxy settings, if any
62
        proxy = self.proxies.get(scheme)
63
        if proxy:
64
            scheme, host, x, x, x, x = urlparse.urlparse(proxy)
65
66
        assert scheme == "http", "only supports HTTP requests (%s)" % scheme
67
68
        if not path:
69
            path = "/"
70
        if params:
71
            path = path + ";" + params
72
        if query:
73
            path = path + "?" + query
74
        if proxy:
75
            path = scheme + "://" + self.host + path
76
77
        self.path = path
78
79
        # get port number
80
        try:
81
            host, port = host.split(":", 1)
82
            port = int(port)
83
        except (TypeError, ValueError):
84
            port = 80 # default port
85
86
        self.consumer = consumer
87
88
        self.status = None
89
        self.header = None
90
91
        self.bytes_in = 0
92
        self.bytes_out = 0
93
94
        self.content_type = None
95
        self.content_length = None
96
        self.content_encoding = None
97
        self.transfer_encoding = None
98
99
        self.data = ""
100
101
        self.chunk_size = None
102
103
        self.timestamp = time.time()
104
105
        self.extra_headers = extra_headers
106
107
        self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
108
        try:
109
            self.connect((host, port))
110
        except socket.error:
111
            self.consumer.http(0, self, sys.exc_info())
112
113
    def handle_connect(self):
114
        # connection succeeded
115
116
        request = [
117
            "GET %s HTTP/%s" % (self.path, self.http_version),
118
            "Host: %s" % self.host,
119
            ]
120
121
        if GzipConsumer:
122
            request.append("Accept-Encoding: gzip")
123
124
        if self.extra_headers:
125
            request.extend(self.extra_headers)
126
127
        # make sure to include a user agent
128
        for header in request:
129
            if string.lower(header).startswith("user-agent:"):
130
                break
131
        else:
132
            request.append("User-Agent: %s" % self.user_agent)
133
134
        request = string.join(request, "\r\n") + "\r\n\r\n"
135
136
        self.send(request)
137
138
        self.bytes_out = self.bytes_out + len(request)
139
140
    def handle_expt(self):
141
        # connection failed (windows); notify consumer
142
143
        if sys.platform == "win32":
144
            self.close()
145
            self.consumer.http(0, self)
146
147
    def handle_read(self):
148
        # handle incoming data
149
150
        data = self.recv(2048)
151
152
        self.data = self.data + data
153
        self.bytes_in = self.bytes_in + len(data)
154
155
        while self.data:
156
157
            if not self.header:
158
                # check if we've seen a full header
159
160
                header = self.data.split("\r\n\r\n", 1)
161
                if len(header) <= 1:
162
                    return
163
                header, self.data = header
164
165
                # parse header
166
                fp = StringIO.StringIO(header)
167
                self.status = fp.readline().split(" ", 2)
168
                self.header = mimetools.Message(fp)
169
170
                # get http headers
171
                self.content_type = self.header.get("content-type")
172
                try:
173
                    self.content_length = int(
174
                        self.header.get("content-length")
175
                        )
176
                except (ValueError, TypeError):
177
                    self.content_length = None
178
                self.transfer_encoding = self.header.get("transfer-encoding")
179
                self.content_encoding = self.header.get("content-encoding")
180
181
                if self.content_encoding == "gzip":
182
                    # FIXME: report error if GzipConsumer is not available
183
                    self.consumer = GzipConsumer(self.consumer)
184
185
                try:
186
                    self.consumer.http(1, self)
187
                except Redirect, v:
188
                    # redirect
189
                    if v.location:
190
                        do_request(
191
                            v.location, self.consumer, self.extra_headers
192
                            )
193
                    self.close()
194
                    return
195
                except CloseConnection:
196
                    self.close()
197
                    return
198
199
            if self.transfer_encoding == "chunked" and self.chunk_size is None:
200
201
                # strip off leading whitespace
202
                if self.data.startswith("\r\n"):
203
                    self.data = self.data[2:]
204
205
                chunk_size = self.data.split("\r\n", 1)
206
                if len(chunk_size) <= 1:
207
                    return
208
                chunk_size, self.data = chunk_size
209
210
                try:
211
                    self.chunk_size = int(chunk_size, 16)
212
                    if self.chunk_size <= 0:
213
                        raise ValueError
214
                except ValueError:
215
                    return self.handle_close()
216
217
            if not self.data:
218
                return
219
220
            data = self.data
221
            self.data = ""
222
223
            chunk_size = self.chunk_size or len(data)
224
225
            if chunk_size < len(data):
226
                self.data = data[chunk_size:]
227
                data = data[:chunk_size]
228
                self.chunk_size = None
229
            else:
230
                self.chunk_size = chunk_size - len(data)
231
                if self.chunk_size <= 0:
232
                    self.chunk_size = None
233
234
            if data:
235
                self.consumer.feed(data)
236
237
            if self.content_length:
238
                self.content_length -= chunk_size
239
                if self.content_length <= 0:
240
                    return self.handle_close()
241
242
    def handle_close(self):
243
        self.consumer.close()
244
        self.close()
245
246
    def handle_error(self):
247
        self.consumer.http(0, self, sys.exc_info())
248
        self.close()
249
250
def do_request(uri, consumer, extra_headers=None):
251
252
    return async_http(uri, consumer, extra_headers)
253
254
if __name__ == "__main__":
255
    class dummy_consumer:
256
        def feed(self, data):
257
            # print "feed", repr(data)
258
            print "feed", repr(data[:20]), repr(data[-20:]), len(data)
259
        def close(self):
260
            print "close"
261
        def http(self, ok, connection, **args):
262
            print ok, connection, args
263
            print "status", connection.status
264
            print "header", connection.header
265
    try:
266
        url = sys.argv[1]
267
    except IndexError:
268
        url = "http://www.cnn.com/"
269
    do_request(url, dummy_consumer())
270
    asyncore.loop()