1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""Implementation of Transport over http.
20
from collections import deque
21
from cStringIO import StringIO
22
import urllib, urllib2
24
from warnings import warn
27
from bzrlib.transport import Transport, Server
28
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
29
TransportError, ConnectionError)
30
from bzrlib.errors import BzrError, BzrCheckError
31
from bzrlib.branch import Branch
32
from bzrlib.trace import mutter
33
from bzrlib.ui import ui_factory
36
def extract_auth(url, password_manager):
38
Extract auth parameters from am HTTP/HTTPS url and add them to the given
39
password manager. Return the url, minus those auth parameters (which
42
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
43
assert (scheme == 'http') or (scheme == 'https')
46
auth, netloc = netloc.split('@', 1)
48
username, password = auth.split(':', 1)
50
username, password = auth, None
52
host = netloc.split(':', 1)[0]
55
username = urllib.unquote(username)
56
if password is not None:
57
password = urllib.unquote(password)
59
password = ui_factory.get_password(prompt='HTTP %(user)@%(host) password',
60
user=username, host=host)
61
password_manager.add_password(None, host, username, password)
62
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
66
class Request(urllib2.Request):
67
"""Request object for urllib2 that allows the method to be overridden."""
72
if self.method is not None:
75
return urllib2.Request.get_method(self)
78
def get_url(url, method=None, ranges=None):
84
mutter("get_url %s [%s]", url, rangestring)
85
manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
86
url = extract_auth(url, manager)
87
auth_handler = urllib2.HTTPBasicAuthHandler(manager)
88
opener = urllib2.build_opener(auth_handler)
90
request = Request(url)
91
request.method = method
92
request.add_header('User-Agent', 'bzr/%s' % bzrlib.__version__)
94
request.add_header('Range', ranges)
95
response = opener.open(request)
99
class HttpTransport(Transport):
100
"""This is the transport agent for http:// access.
102
TODO: Implement pipelined versions of all of the *_multi() functions.
105
def __init__(self, base):
106
"""Set the base path where files will be stored."""
107
assert base.startswith('http://') or base.startswith('https://')
110
super(HttpTransport, self).__init__(base)
111
# In the future we might actually connect to the remote host
112
# rather than using get_url
113
# self._connection = None
114
(self._proto, self._host,
115
self._path, self._parameters,
116
self._query, self._fragment) = urlparse.urlparse(self.base)
118
def should_cache(self):
119
"""Return True if the data pulled across should be cached locally.
123
def clone(self, offset=None):
124
"""Return a new HttpTransport with root at self.base + offset
125
For now HttpTransport does not actually connect, so just return
126
a new HttpTransport object.
129
return HttpTransport(self.base)
131
return HttpTransport(self.abspath(offset))
133
def abspath(self, relpath):
134
"""Return the full url to the given relative path.
135
This can be supplied with a string or a list
137
assert isinstance(relpath, basestring)
138
if isinstance(relpath, basestring):
139
relpath_parts = relpath.split('/')
141
# TODO: Don't call this with an array - no magic interfaces
142
relpath_parts = relpath[:]
143
if len(relpath_parts) > 1:
144
if relpath_parts[0] == '':
145
raise ValueError("path %r within branch %r seems to be absolute"
146
% (relpath, self._path))
147
if relpath_parts[-1] == '':
148
raise ValueError("path %r within branch %r seems to be a directory"
149
% (relpath, self._path))
150
basepath = self._path.split('/')
151
if len(basepath) > 0 and basepath[-1] == '':
152
basepath = basepath[:-1]
153
for p in relpath_parts:
155
if len(basepath) == 0:
156
# In most filesystems, a request for the parent
157
# of root, just returns root.
160
elif p == '.' or p == '':
164
# Possibly, we could use urlparse.urljoin() here, but
165
# I'm concerned about when it chooses to strip the last
166
# portion of the path, and when it doesn't.
167
path = '/'.join(basepath)
168
return urlparse.urlunparse((self._proto,
169
self._host, path, '', '', ''))
171
def has(self, relpath):
172
"""Does the target location exist?
174
TODO: This should be changed so that we don't use
175
urllib2 and get an exception, the code path would be
176
cleaner if we just do an http HEAD request, and parse
181
path = self.abspath(relpath)
182
f = get_url(path, method='HEAD')
183
# Without the read and then close()
184
# we tend to have busy sockets.
188
except urllib2.HTTPError, e:
189
mutter('url error code: %s for has url: %r', e.code, path)
194
mutter('io error: %s %s for has url: %r',
195
e.errno, errno.errorcode.get(e.errno), path)
196
if e.errno == errno.ENOENT:
198
raise TransportError(orig_error=e)
200
def _get(self, relpath, decode=False, ranges=None):
203
path = self.abspath(relpath)
204
return get_url(path, ranges=ranges)
205
except urllib2.HTTPError, e:
206
mutter('url error code: %s for has url: %r', e.code, path)
208
raise NoSuchFile(path, extra=e)
210
except (BzrError, IOError), e:
211
if hasattr(e, 'errno'):
212
mutter('io error: %s %s for has url: %r',
213
e.errno, errno.errorcode.get(e.errno), path)
214
if e.errno == errno.ENOENT:
215
raise NoSuchFile(path, extra=e)
216
raise ConnectionError(msg = "Error retrieving %s: %s"
217
% (self.abspath(relpath), str(e)),
220
def get(self, relpath, decode=False):
221
"""Get the file at the given relative path.
223
:param relpath: The relative path to the file
225
return self._get(relpath, decode=decode)
227
def readv(self, relpath, offsets):
228
"""Get parts of the file at the given relative path.
230
:offsets: A list of (offset, size) tuples.
231
:return: A list or generator of (offset, data) tuples
233
# this is not quite regular enough to have a single driver routine and
234
# helper method in Transport.
235
def do_combined_read(combined_offsets):
236
# read one coalesced block
238
for offset, size in combined_offsets:
240
mutter('readv coalesced %d reads.', len(combined_offsets))
241
offset = combined_offsets[0][0]
242
ranges = 'bytes=%d-%d' % (offset, offset + total_size - 1)
243
response = self._get(relpath, ranges=ranges)
244
if response.code == 206:
245
for off, size in combined_offsets:
246
yield off, response.read(size)
247
elif response.code == 200:
248
data = response.read(offset + total_size)[offset:offset + total_size]
250
for offset, size in combined_offsets:
251
yield offset, data[pos:pos + size]
257
pending_offsets = deque(offsets)
258
combined_offsets = []
259
while len(pending_offsets):
260
offset, size = pending_offsets.popleft()
261
if not combined_offsets:
262
combined_offsets = [[offset, size]]
264
if (len (combined_offsets) < 50 and
265
combined_offsets[-1][0] + combined_offsets[-1][1] == offset):
267
combined_offsets.append([offset, size])
269
# incompatible, or over the threshold issue a read and yield
270
pending_offsets.appendleft((offset, size))
271
for result in do_combined_read(combined_offsets):
273
combined_offsets = []
274
# whatever is left is a single coalesced request
275
if len(combined_offsets):
276
for result in do_combined_read(combined_offsets):
279
def put(self, relpath, f, mode=None):
280
"""Copy the file-like or string object into the location.
282
:param relpath: Location to put the contents, relative to base.
283
:param f: File-like or string object.
285
raise TransportNotPossible('http PUT not supported')
287
def mkdir(self, relpath, mode=None):
288
"""Create a directory at the given path."""
289
raise TransportNotPossible('http does not support mkdir()')
291
def rmdir(self, relpath):
292
"""See Transport.rmdir."""
293
raise TransportNotPossible('http does not support rmdir()')
295
def append(self, relpath, f):
296
"""Append the text in the file-like object into the final
299
raise TransportNotPossible('http does not support append()')
301
def copy(self, rel_from, rel_to):
302
"""Copy the item at rel_from to the location at rel_to"""
303
raise TransportNotPossible('http does not support copy()')
305
def copy_to(self, relpaths, other, mode=None, pb=None):
306
"""Copy a set of entries from self into another Transport.
308
:param relpaths: A list/generator of entries to be copied.
310
TODO: if other is LocalTransport, is it possible to
311
do better than put(get())?
313
# At this point HttpTransport might be able to check and see if
314
# the remote location is the same, and rather than download, and
315
# then upload, it could just issue a remote copy_this command.
316
if isinstance(other, HttpTransport):
317
raise TransportNotPossible('http cannot be the target of copy_to()')
319
return super(HttpTransport, self).copy_to(relpaths, other, mode=mode, pb=pb)
321
def move(self, rel_from, rel_to):
322
"""Move the item at rel_from to the location at rel_to"""
323
raise TransportNotPossible('http does not support move()')
325
def delete(self, relpath):
326
"""Delete the item at relpath"""
327
raise TransportNotPossible('http does not support delete()')
329
def is_readonly(self):
330
"""See Transport.is_readonly."""
334
"""See Transport.listable."""
337
def stat(self, relpath):
338
"""Return the stat information for a file.
340
raise TransportNotPossible('http does not support stat()')
342
def lock_read(self, relpath):
343
"""Lock the given file for shared (read) access.
344
:return: A lock object, which should be passed to Transport.unlock()
346
# The old RemoteBranch ignore lock for reading, so we will
347
# continue that tradition and return a bogus lock object.
348
class BogusLock(object):
349
def __init__(self, path):
353
return BogusLock(relpath)
355
def lock_write(self, relpath):
356
"""Lock the given file for exclusive (write) access.
357
WARNING: many transports do not support this, so trying avoid using it
359
:return: A lock object, which should be passed to Transport.unlock()
361
raise TransportNotPossible('http does not support lock_write()')
364
#---------------- test server facilities ----------------
365
import BaseHTTPServer, SimpleHTTPServer, socket, time
369
class WebserverNotAvailable(Exception):
373
class BadWebserverPath(ValueError):
375
return 'path %s is not in %s' % self.args
378
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
380
def log_message(self, format, *args):
381
self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
382
self.address_string(),
383
self.log_date_time_string(),
385
self.headers.get('referer', '-'),
386
self.headers.get('user-agent', '-'))
388
def handle_one_request(self):
389
"""Handle a single HTTP request.
391
You normally don't need to override this method; see the class
392
__doc__ string for information on how to handle specific HTTP
393
commands such as GET and POST.
396
for i in xrange(1,11): # Don't try more than 10 times
398
self.raw_requestline = self.rfile.readline()
399
except socket.error, e:
400
if e.args[0] in (errno.EAGAIN, errno.EWOULDBLOCK):
401
# omitted for now because some tests look at the log of
402
# the server and expect to see no errors. see recent
403
# email thread. -- mbp 20051021.
404
## self.log_message('EAGAIN (%d) while reading from raw_requestline' % i)
410
if not self.raw_requestline:
411
self.close_connection = 1
413
if not self.parse_request(): # An error code has been sent, just exit
415
mname = 'do_' + self.command
416
if not hasattr(self, mname):
417
self.send_error(501, "Unsupported method (%r)" % self.command)
419
method = getattr(self, mname)
423
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
424
def __init__(self, server_address, RequestHandlerClass, test_case):
425
BaseHTTPServer.HTTPServer.__init__(self, server_address,
427
self.test_case = test_case
430
class HttpServer(Server):
431
"""A test server for http transports."""
433
def _http_start(self):
435
httpd = TestingHTTPServer(('localhost', 0),
436
TestingHTTPRequestHandler,
438
host, port = httpd.socket.getsockname()
439
self._http_base_url = 'http://localhost:%s/' % port
440
self._http_starting.release()
441
httpd.socket.settimeout(0.1)
443
while self._http_running:
445
httpd.handle_request()
446
except socket.timeout:
449
def _get_remote_url(self, path):
450
path_parts = path.split(os.path.sep)
451
if os.path.isabs(path):
452
if path_parts[:len(self._local_path_parts)] != \
453
self._local_path_parts:
454
raise BadWebserverPath(path, self.test_dir)
455
remote_path = '/'.join(path_parts[len(self._local_path_parts):])
457
remote_path = '/'.join(path_parts)
459
self._http_starting.acquire()
460
self._http_starting.release()
461
return self._http_base_url + remote_path
463
def log(self, format, *args):
464
"""Capture Server log output."""
465
self.logs.append(format % args)
468
"""See bzrlib.transport.Server.setUp."""
469
self._home_dir = os.getcwdu()
470
self._local_path_parts = self._home_dir.split(os.path.sep)
471
self._http_starting = threading.Lock()
472
self._http_starting.acquire()
473
self._http_running = True
474
self._http_base_url = None
475
self._http_thread = threading.Thread(target=self._http_start)
476
self._http_thread.setDaemon(True)
477
self._http_thread.start()
478
self._http_proxy = os.environ.get("http_proxy")
479
if self._http_proxy is not None:
480
del os.environ["http_proxy"]
484
"""See bzrlib.transport.Server.tearDown."""
485
self._http_running = False
486
self._http_thread.join()
487
if self._http_proxy is not None:
489
os.environ["http_proxy"] = self._http_proxy
492
"""See bzrlib.transport.Server.get_url."""
493
return self._get_remote_url(self._home_dir)
495
def get_bogus_url(self):
496
"""See bzrlib.transport.Server.get_bogus_url."""
497
return 'http://jasldkjsalkdjalksjdkljasd'
500
def get_test_permutations():
501
"""Return the permutations to be used in testing."""
502
warn("There are no HTTPS transport provider tests yet.")
503
return [(HttpTransport, HttpServer),