1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""Implementation of Transport over http.
20
from cStringIO import StringIO
21
import urllib, urllib2
23
from warnings import warn
26
from bzrlib.transport import Transport, Server
27
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
28
TransportError, ConnectionError)
29
from bzrlib.errors import BzrError, BzrCheckError
30
from bzrlib.branch import Branch
31
from bzrlib.trace import mutter
32
from bzrlib.ui import ui_factory
35
def extract_auth(url, password_manager):
37
Extract auth parameters from am HTTP/HTTPS url and add them to the given
38
password manager. Return the url, minus those auth parameters (which
41
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
42
assert (scheme == 'http') or (scheme == 'https')
45
auth, netloc = netloc.split('@', 1)
47
username, password = auth.split(':', 1)
49
username, password = auth, None
51
host = netloc.split(':', 1)[0]
54
username = urllib.unquote(username)
55
if password is not None:
56
password = urllib.unquote(password)
58
password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
59
user=username, host=host)
60
password_manager.add_password(None, host, username, password)
61
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
65
class Request(urllib2.Request):
66
"""Request object for urllib2 that allows the method to be overridden."""
71
if self.method is not None:
74
return urllib2.Request.get_method(self)
77
def get_url(url, method=None, ranges=None):
79
mutter("get_url %s", url)
80
manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
81
url = extract_auth(url, manager)
82
auth_handler = urllib2.HTTPBasicAuthHandler(manager)
83
opener = urllib2.build_opener(auth_handler)
85
request = Request(url)
86
request.method = method
87
request.add_header('User-Agent', 'bzr/%s' % bzrlib.__version__)
89
request.add_header('Range', ranges)
90
response = opener.open(request)
94
class HttpTransport(Transport):
95
"""This is the transport agent for http:// access.
97
TODO: Implement pipelined versions of all of the *_multi() functions.
100
def __init__(self, base):
101
"""Set the base path where files will be stored."""
102
assert base.startswith('http://') or base.startswith('https://')
105
super(HttpTransport, self).__init__(base)
106
# In the future we might actually connect to the remote host
107
# rather than using get_url
108
# self._connection = None
109
(self._proto, self._host,
110
self._path, self._parameters,
111
self._query, self._fragment) = urlparse.urlparse(self.base)
113
def should_cache(self):
114
"""Return True if the data pulled across should be cached locally.
118
def clone(self, offset=None):
119
"""Return a new HttpTransport with root at self.base + offset
120
For now HttpTransport does not actually connect, so just return
121
a new HttpTransport object.
124
return HttpTransport(self.base)
126
return HttpTransport(self.abspath(offset))
128
def abspath(self, relpath):
129
"""Return the full url to the given relative path.
130
This can be supplied with a string or a list
132
assert isinstance(relpath, basestring)
133
if isinstance(relpath, basestring):
134
relpath_parts = relpath.split('/')
136
# TODO: Don't call this with an array - no magic interfaces
137
relpath_parts = relpath[:]
138
if len(relpath_parts) > 1:
139
if relpath_parts[0] == '':
140
raise ValueError("path %r within branch %r seems to be absolute"
141
% (relpath, self._path))
142
if relpath_parts[-1] == '':
143
raise ValueError("path %r within branch %r seems to be a directory"
144
% (relpath, self._path))
145
basepath = self._path.split('/')
146
if len(basepath) > 0 and basepath[-1] == '':
147
basepath = basepath[:-1]
148
for p in relpath_parts:
150
if len(basepath) == 0:
151
# In most filesystems, a request for the parent
152
# of root, just returns root.
155
elif p == '.' or p == '':
159
# Possibly, we could use urlparse.urljoin() here, but
160
# I'm concerned about when it chooses to strip the last
161
# portion of the path, and when it doesn't.
162
path = '/'.join(basepath)
163
return urlparse.urlunparse((self._proto,
164
self._host, path, '', '', ''))
166
def has(self, relpath):
167
"""Does the target location exist?
169
TODO: This should be changed so that we don't use
170
urllib2 and get an exception, the code path would be
171
cleaner if we just do an http HEAD request, and parse
176
path = self.abspath(relpath)
177
f = get_url(path, method='HEAD')
178
# Without the read and then close()
179
# we tend to have busy sockets.
183
except urllib2.HTTPError, e:
184
mutter('url error code: %s for has url: %r', e.code, path)
189
mutter('io error: %s %s for has url: %r',
190
e.errno, errno.errorcode.get(e.errno), path)
191
if e.errno == errno.ENOENT:
193
raise TransportError(orig_error=e)
195
def _get(self, relpath, decode=False, ranges=None):
198
path = self.abspath(relpath)
199
return get_url(path, ranges=ranges)
200
except urllib2.HTTPError, e:
201
mutter('url error code: %s for has url: %r', e.code, path)
203
raise NoSuchFile(path, extra=e)
205
except (BzrError, IOError), e:
206
if hasattr(e, 'errno'):
207
mutter('io error: %s %s for has url: %r',
208
e.errno, errno.errorcode.get(e.errno), path)
209
if e.errno == errno.ENOENT:
210
raise NoSuchFile(path, extra=e)
211
raise ConnectionError(msg = "Error retrieving %s: %s"
212
% (self.abspath(relpath), str(e)),
215
def get(self, relpath, decode=False):
216
"""Get the file at the given relative path.
218
:param relpath: The relative path to the file
220
return self._get(relpath, decode=decode)
222
def readv(self, relpath, offsets):
223
"""Get parts of the file at the given relative path.
225
:offsets: A list of (offset, size) tuples.
226
:return: A list or generator of (offset, data) tuples
228
response = self._get(relpath,
229
ranges=','.join(['%d-%d' % (off, off + size - 1)
230
for off, size in offsets]))
231
if response.code == 206:
232
for off, size in offsets:
233
yield off, response.read(size)
234
elif response.code == 200:
235
fp = StringIO(response.read())
236
for off, size in offsets:
238
yield off, fp.read(size)
240
def put(self, relpath, f, mode=None):
241
"""Copy the file-like or string object into the location.
243
:param relpath: Location to put the contents, relative to base.
244
:param f: File-like or string object.
246
raise TransportNotPossible('http PUT not supported')
248
def mkdir(self, relpath, mode=None):
249
"""Create a directory at the given path."""
250
raise TransportNotPossible('http does not support mkdir()')
252
def rmdir(self, relpath):
253
"""See Transport.rmdir."""
254
raise TransportNotPossible('http does not support rmdir()')
256
def append(self, relpath, f):
257
"""Append the text in the file-like object into the final
260
raise TransportNotPossible('http does not support append()')
262
def copy(self, rel_from, rel_to):
263
"""Copy the item at rel_from to the location at rel_to"""
264
raise TransportNotPossible('http does not support copy()')
266
def copy_to(self, relpaths, other, mode=None, pb=None):
267
"""Copy a set of entries from self into another Transport.
269
:param relpaths: A list/generator of entries to be copied.
271
TODO: if other is LocalTransport, is it possible to
272
do better than put(get())?
274
# At this point HttpTransport might be able to check and see if
275
# the remote location is the same, and rather than download, and
276
# then upload, it could just issue a remote copy_this command.
277
if isinstance(other, HttpTransport):
278
raise TransportNotPossible('http cannot be the target of copy_to()')
280
return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)
282
def move(self, rel_from, rel_to):
283
"""Move the item at rel_from to the location at rel_to"""
284
raise TransportNotPossible('http does not support move()')
286
def delete(self, relpath):
287
"""Delete the item at relpath"""
288
raise TransportNotPossible('http does not support delete()')
290
def is_readonly(self):
291
"""See Transport.is_readonly."""
295
"""See Transport.listable."""
298
def stat(self, relpath):
299
"""Return the stat information for a file.
301
raise TransportNotPossible('http does not support stat()')
303
def lock_read(self, relpath):
304
"""Lock the given file for shared (read) access.
305
:return: A lock object, which should be passed to Transport.unlock()
307
# The old RemoteBranch ignore lock for reading, so we will
308
# continue that tradition and return a bogus lock object.
309
class BogusLock(object):
310
def __init__(self, path):
314
return BogusLock(relpath)
316
def lock_write(self, relpath):
317
"""Lock the given file for exclusive (write) access.
318
WARNING: many transports do not support this, so trying avoid using it
320
:return: A lock object, which should be passed to Transport.unlock()
322
raise TransportNotPossible('http does not support lock_write()')
325
#---------------- test server facilities ----------------
326
import BaseHTTPServer, SimpleHTTPServer, socket, time
330
class WebserverNotAvailable(Exception):
334
class BadWebserverPath(ValueError):
336
return 'path %s is not in %s' % self.args
339
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
341
def log_message(self, format, *args):
342
self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
343
self.address_string(),
344
self.log_date_time_string(),
346
self.headers.get('referer', '-'),
347
self.headers.get('user-agent', '-'))
349
def handle_one_request(self):
350
"""Handle a single HTTP request.
352
You normally don't need to override this method; see the class
353
__doc__ string for information on how to handle specific HTTP
354
commands such as GET and POST.
357
for i in xrange(1,11): # Don't try more than 10 times
359
self.raw_requestline = self.rfile.readline()
360
except socket.error, e:
361
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
362
# omitted for now because some tests look at the log of
363
# the server and expect to see no errors. see recent
364
# email thread. -- mbp 20051021.
365
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
371
if not self.raw_requestline:
372
self.close_connection = 1
374
if not self.parse_request(): # An error code has been sent, just exit
376
mname = 'do_' + self.command
377
if not hasattr(self, mname):
378
self.send_error(501, "Unsupported method (%r)" % self.command)
380
method = getattr(self, mname)
384
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
385
def __init__(self, server_address, RequestHandlerClass, test_case):
386
BaseHTTPServer.HTTPServer.__init__(self, server_address,
388
self.test_case = test_case
391
class HttpServer(Server):
392
"""A test server for http transports."""
394
def _http_start(self):
396
httpd = TestingHTTPServer(('localhost', 0),
397
TestingHTTPRequestHandler,
399
host, port = httpd.socket.getsockname()
400
self._http_base_url = 'http://localhost:%s/' % port
401
self._http_starting.release()
402
httpd.socket.settimeout(0.1)
404
while self._http_running:
406
httpd.handle_request()
407
except socket.timeout:
410
def _get_remote_url(self, path):
411
path_parts = path.split(os.path.sep)
412
if os.path.isabs(path):
413
if path_parts[:len(self._local_path_parts)] != \
414
self._local_path_parts:
415
raise BadWebserverPath(path, self.test_dir)
416
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
418
remote_path = '/'.join(path_parts)
420
self._http_starting.acquire()
421
self._http_starting.release()
422
return self._http_base_url + remote_path
424
def log(self, format, *args):
425
"""Capture Server log output."""
426
self.logs.append(format % args)
429
"""See bzrlib.transport.Server.setUp."""
430
self._home_dir = os.getcwdu()
431
self._local_path_parts = self._home_dir.split(os.path.sep)
432
self._http_starting = threading.Lock()
433
self._http_starting.acquire()
434
self._http_running = True
435
self._http_base_url = None
436
self._http_thread = threading.Thread(target=self._http_start)
437
self._http_thread.setDaemon(True)
438
self._http_thread.start()
439
self._http_proxy = os.environ.get("http_proxy")
440
if self._http_proxy is not None:
441
del os.environ["http_proxy"]
445
"""See bzrlib.transport.Server.tearDown."""
446
self._http_running = False
447
self._http_thread.join()
448
if self._http_proxy is not None:
450
os.environ["http_proxy"] = self._http_proxy
453
"""See bzrlib.transport.Server.get_url."""
454
return self._get_remote_url(self._home_dir)
456
def get_bogus_url(self):
457
"""See bzrlib.transport.Server.get_bogus_url."""
458
return 'http://jasldkjsalkdjalksjdkljasd'
461
def get_test_permutations():
462
"""Return the permutations to be used in testing."""
463
warn("There are no HTTPS transport provider tests yet.")
464
return [(HttpTransport, HttpServer),