1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""Implementation of Transport over http.
20
from cStringIO import StringIO
21
import urllib, urllib2
24
from bzrlib.transport import Transport, Server
25
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
26
TransportError, ConnectionError)
27
from bzrlib.errors import BzrError, BzrCheckError
28
from bzrlib.branch import Branch
29
from bzrlib.trace import mutter
32
def extract_auth(url, password_manager):
34
Extract auth parameters from am HTTP/HTTPS url and add them to the given
35
password manager. Return the url, minus those auth parameters (which
38
assert url.startswith('http://') or url.startswith('https://')
39
scheme, host = url.split('//', 1)
41
host, path = host.split('/', 1)
47
auth, host = host.split('@', 1)
49
username, password = auth.split(':', 1)
51
username, password = auth, None
53
host, port = host.split(':', 1)
55
# FIXME: if password isn't given, should we ask for it?
56
if password is not None:
57
username = urllib.unquote(username)
58
password = urllib.unquote(password)
59
password_manager.add_password(None, host, username, password)
60
url = scheme + '//' + host + port + path
65
mutter("get_url %s" % url)
66
manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
67
url = extract_auth(url, manager)
68
auth_handler = urllib2.HTTPBasicAuthHandler(manager)
69
opener = urllib2.build_opener(auth_handler)
70
url_f = opener.open(url)
73
class HttpTransport(Transport):
74
"""This is the transport agent for http:// access.
76
TODO: Implement pipelined versions of all of the *_multi() functions.
79
def __init__(self, base):
80
"""Set the base path where files will be stored."""
81
assert base.startswith('http://') or base.startswith('https://')
84
super(HttpTransport, self).__init__(base)
85
# In the future we might actually connect to the remote host
86
# rather than using get_url
87
# self._connection = None
88
(self._proto, self._host,
89
self._path, self._parameters,
90
self._query, self._fragment) = urlparse.urlparse(self.base)
92
def should_cache(self):
93
"""Return True if the data pulled across should be cached locally.
97
def clone(self, offset=None):
98
"""Return a new HttpTransport with root at self.base + offset
99
For now HttpTransport does not actually connect, so just return
100
a new HttpTransport object.
103
return HttpTransport(self.base)
105
return HttpTransport(self.abspath(offset))
107
def abspath(self, relpath):
108
"""Return the full url to the given relative path.
109
This can be supplied with a string or a list
111
assert isinstance(relpath, basestring)
112
if isinstance(relpath, basestring):
113
relpath_parts = relpath.split('/')
115
# TODO: Don't call this with an array - no magic interfaces
116
relpath_parts = relpath[:]
117
if len(relpath_parts) > 1:
118
if relpath_parts[0] == '':
119
raise ValueError("path %r within branch %r seems to be absolute"
120
% (relpath, self._path))
121
if relpath_parts[-1] == '':
122
raise ValueError("path %r within branch %r seems to be a directory"
123
% (relpath, self._path))
124
basepath = self._path.split('/')
125
if len(basepath) > 0 and basepath[-1] == '':
126
basepath = basepath[:-1]
127
for p in relpath_parts:
129
if len(basepath) == 0:
130
# In most filesystems, a request for the parent
131
# of root, just returns root.
134
elif p == '.' or p == '':
138
# Possibly, we could use urlparse.urljoin() here, but
139
# I'm concerned about when it chooses to strip the last
140
# portion of the path, and when it doesn't.
141
path = '/'.join(basepath)
142
return urlparse.urlunparse((self._proto,
143
self._host, path, '', '', ''))
145
def has(self, relpath):
146
"""Does the target location exist?
148
TODO: HttpTransport.has() should use a HEAD request,
149
not a full GET request.
151
TODO: This should be changed so that we don't use
152
urllib2 and get an exception, the code path would be
153
cleaner if we just do an http HEAD request, and parse
158
path = self.abspath(relpath)
160
# Without the read and then close()
161
# we tend to have busy sockets.
165
except urllib2.URLError, e:
166
mutter('url error code: %s for has url: %r', e.code, path)
171
mutter('io error: %s %s for has url: %r',
172
e.errno, errno.errorcode.get(e.errno), path)
173
if e.errno == errno.ENOENT:
175
raise TransportError(orig_error=e)
177
def get(self, relpath, decode=False):
178
"""Get the file at the given relative path.
180
:param relpath: The relative path to the file
184
path = self.abspath(relpath)
186
except urllib2.HTTPError, e:
187
mutter('url error code: %s for has url: %r', e.code, path)
189
raise NoSuchFile(path, extra=e)
191
except (BzrError, IOError), e:
192
if hasattr(e, 'errno'):
193
mutter('io error: %s %s for has url: %r',
194
e.errno, errno.errorcode.get(e.errno), path)
195
if e.errno == errno.ENOENT:
196
raise NoSuchFile(path, extra=e)
197
raise ConnectionError(msg = "Error retrieving %s: %s"
198
% (self.abspath(relpath), str(e)),
201
def put(self, relpath, f, mode=None):
202
"""Copy the file-like or string object into the location.
204
:param relpath: Location to put the contents, relative to base.
205
:param f: File-like or string object.
207
raise TransportNotPossible('http PUT not supported')
209
def mkdir(self, relpath, mode=None):
210
"""Create a directory at the given path."""
211
raise TransportNotPossible('http does not support mkdir()')
213
def append(self, relpath, f):
214
"""Append the text in the file-like object into the final
217
raise TransportNotPossible('http does not support append()')
219
def copy(self, rel_from, rel_to):
220
"""Copy the item at rel_from to the location at rel_to"""
221
raise TransportNotPossible('http does not support copy()')
223
def copy_to(self, relpaths, other, mode=None, pb=None):
224
"""Copy a set of entries from self into another Transport.
226
:param relpaths: A list/generator of entries to be copied.
228
TODO: if other is LocalTransport, is it possible to
229
do better than put(get())?
231
# At this point HttpTransport might be able to check and see if
232
# the remote location is the same, and rather than download, and
233
# then upload, it could just issue a remote copy_this command.
234
if isinstance(other, HttpTransport):
235
raise TransportNotPossible('http cannot be the target of copy_to()')
237
return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)
239
def move(self, rel_from, rel_to):
240
"""Move the item at rel_from to the location at rel_to"""
241
raise TransportNotPossible('http does not support move()')
243
def delete(self, relpath):
244
"""Delete the item at relpath"""
245
raise TransportNotPossible('http does not support delete()')
247
def is_readonly(self):
248
"""See Transport.is_readonly."""
252
"""See Transport.listable."""
255
def stat(self, relpath):
256
"""Return the stat information for a file.
258
raise TransportNotPossible('http does not support stat()')
260
def lock_read(self, relpath):
261
"""Lock the given file for shared (read) access.
262
:return: A lock object, which should be passed to Transport.unlock()
264
# The old RemoteBranch ignore lock for reading, so we will
265
# continue that tradition and return a bogus lock object.
266
class BogusLock(object):
267
def __init__(self, path):
271
return BogusLock(relpath)
273
def lock_write(self, relpath):
274
"""Lock the given file for exclusive (write) access.
275
WARNING: many transports do not support this, so trying avoid using it
277
:return: A lock object, which should be passed to Transport.unlock()
279
raise TransportNotPossible('http does not support lock_write()')
282
#---------------- test server facilities ----------------
283
import BaseHTTPServer, SimpleHTTPServer, socket, time
287
class WebserverNotAvailable(Exception):
291
class BadWebserverPath(ValueError):
293
return 'path %s is not in %s' % self.args
296
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
298
def log_message(self, format, *args):
299
self.server.test_case.log("webserver - %s - - [%s] %s",
300
self.address_string(),
301
self.log_date_time_string(),
304
def handle_one_request(self):
305
"""Handle a single HTTP request.
307
You normally don't need to override this method; see the class
308
__doc__ string for information on how to handle specific HTTP
309
commands such as GET and POST.
312
for i in xrange(1,11): # Don't try more than 10 times
314
self.raw_requestline = self.rfile.readline()
315
except socket.error, e:
316
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
317
# omitted for now because some tests look at the log of
318
# the server and expect to see no errors. see recent
319
# email thread. -- mbp 20051021.
320
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
326
if not self.raw_requestline:
327
self.close_connection = 1
329
if not self.parse_request(): # An error code has been sent, just exit
331
mname = 'do_' + self.command
332
if not hasattr(self, mname):
333
self.send_error(501, "Unsupported method (%r)" % self.command)
335
method = getattr(self, mname)
338
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
339
def __init__(self, server_address, RequestHandlerClass, test_case):
340
BaseHTTPServer.HTTPServer.__init__(self, server_address,
342
self.test_case = test_case
345
class HttpServer(Server):
346
"""A test server for http transports."""
348
_HTTP_PORTS = range(13000, 0x8000)
350
def _http_start(self):
352
for port in self._HTTP_PORTS:
354
httpd = TestingHTTPServer(('localhost', port),
355
TestingHTTPRequestHandler,
357
except socket.error, e:
358
if e.args[0] == errno.EADDRINUSE:
360
print >>sys.stderr, "Cannot run webserver :-("
366
raise WebserverNotAvailable("Cannot run webserver :-( "
367
"no free ports in range %s..%s" %
368
(_HTTP_PORTS[0], _HTTP_PORTS[-1]))
370
self._http_base_url = 'http://localhost:%s/' % port
371
self._http_starting.release()
372
httpd.socket.settimeout(0.1)
374
while self._http_running:
376
httpd.handle_request()
377
except socket.timeout:
380
def _get_remote_url(self, path):
381
path_parts = path.split(os.path.sep)
382
if os.path.isabs(path):
383
if path_parts[:len(self._local_path_parts)] != \
384
self._local_path_parts:
385
raise BadWebserverPath(path, self.test_dir)
386
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
388
remote_path = '/'.join(path_parts)
390
self._http_starting.acquire()
391
self._http_starting.release()
392
return self._http_base_url + remote_path
394
def log(self, *args, **kwargs):
395
"""Capture Server log output."""
398
"""See bzrlib.transport.Server.setUp."""
399
self._home_dir = os.getcwdu()
400
self._local_path_parts = self._home_dir.split(os.path.sep)
401
self._http_starting = threading.Lock()
402
self._http_starting.acquire()
403
self._http_running = True
404
self._http_base_url = None
405
self._http_thread = threading.Thread(target=self._http_start)
406
self._http_thread.setDaemon(True)
407
self._http_thread.start()
408
self._http_proxy = os.environ.get("http_proxy")
409
if self._http_proxy is not None:
410
del os.environ["http_proxy"]
413
"""See bzrlib.transport.Server.tearDown."""
414
self._http_running = False
415
self._http_thread.join()
416
if self._http_proxy is not None:
418
os.environ["http_proxy"] = self._http_proxy
421
"""See bzrlib.transport.Server.get_url."""
422
return self._get_remote_url(self._home_dir)