30
from collections import deque
31
from copy import deepcopy
29
from cStringIO import StringIO
35
from unittest import TestSuite
33
from bzrlib.lazy_import import lazy_import
34
lazy_import(globals(), """
36
from stat import S_ISDIR
40
import bzrlib.errors as errors
41
from bzrlib.errors import DependencyNotPresent
42
import bzrlib.osutils as osutils
43
from bzrlib.osutils import pumpfile
44
from bzrlib.symbol_versioning import *
45
from bzrlib.trace import mutter, warning
46
import bzrlib.urlutils as urlutils
48
# {prefix: [transport_classes]}
49
# Transports are inserted onto the list LIFO and tried in order; as a result
50
# transports provided by plugins are tried first, which is usually what we
52
_protocol_handlers = {
55
def register_transport(prefix, klass, override=DEPRECATED_PARAMETER):
56
"""Register a transport that can be used to open URLs
58
Normally you should use register_lazy_transport, which defers loading the
59
implementation until it's actually used, and so avoids pulling in possibly
60
large implementation libraries.
62
# Note that this code runs very early in library setup -- trace may not be
64
global _protocol_handlers
65
if deprecated_passed(override):
66
warn("register_transport(override) is deprecated")
67
_protocol_handlers.setdefault(prefix, []).insert(0, klass)
70
def register_lazy_transport(scheme, module, classname):
71
"""Register lazy-loaded transport class.
73
When opening a URL with the given scheme, load the module and then
74
instantiate the particular class.
76
If the module raises DependencyNotPresent when it's imported, it is
77
skipped and another implementation of the protocol is tried. This is
78
intended to be used when the implementation depends on an external
79
implementation that may not be present. If any other error is raised, it
80
propagates up and the attempt to open the url fails.
82
# TODO: If no implementation of a protocol is available because of missing
83
# dependencies, we should perhaps show the message about what dependency
86
mod = __import__(module, globals(), locals(), [classname])
87
klass = getattr(mod, classname)
89
_loader.module = module
90
register_transport(scheme, _loader)
48
from bzrlib.symbol_versioning import (
54
from bzrlib.trace import (
57
from bzrlib import registry
60
# a dictionary of open file streams. Keys are absolute paths, values are
93
65
def _get_protocol_handlers():
94
66
"""Return a dictionary of {urlprefix: [factory]}"""
95
return _protocol_handlers
67
return transport_list_registry
98
70
def _set_protocol_handlers(new_handlers):
101
73
WARNING this will remove all build in protocols. Use with care.
103
global _protocol_handlers
104
_protocol_handlers = new_handlers
75
global transport_list_registry
76
transport_list_registry = new_handlers
107
79
def _clear_protocol_handlers():
108
global _protocol_handlers
109
_protocol_handlers = {}
80
global transport_list_registry
81
transport_list_registry = TransportListRegistry()
112
84
def _get_transport_modules():
113
85
"""Return a list of the modules providing transports."""
115
for prefix, factory_list in _protocol_handlers.items():
87
for prefix, factory_list in transport_list_registry.iteritems():
116
88
for factory in factory_list:
117
if factory.__module__ == "bzrlib.transport":
118
# this is a lazy load transport, because no real ones
119
# are directlry in bzrlib.transport
120
modules.add(factory.module)
89
if hasattr(factory, "_module_name"):
90
modules.add(factory._module_name)
122
modules.add(factory.__module__)
92
modules.add(factory._obj.__module__)
93
# Add chroot directly, because there is no handler registered for it.
94
modules.add('bzrlib.transport.chroot')
123
95
result = list(modules)
100
class TransportListRegistry(registry.Registry):
101
"""A registry which simplifies tracking available Transports.
103
A registration of a new protocol requires two step:
104
1) register the prefix with the function register_transport( )
105
2) register the protocol provider with the function
106
register_transport_provider( ) ( and the "lazy" variant )
108
This is needed because:
109
a) a single provider can support multple protcol ( like the ftp
110
provider which supports both the ftp:// and the aftp:// protocols )
111
b) a single protocol can have multiple providers ( like the http://
112
protocol which is supported by both the urllib and pycurl provider )
115
def register_transport_provider(self, key, obj):
116
self.get(key).insert(0, registry._ObjectGetter(obj))
118
def register_lazy_transport_provider(self, key, module_name, member_name):
119
self.get(key).insert(0,
120
registry._LazyObjectGetter(module_name, member_name))
122
def register_transport(self, key, help=None):
123
self.register(key, [], help)
125
def set_default_transport(self, key=None):
126
"""Return either 'key' or the default key if key is None"""
127
self._default_key = key
130
transport_list_registry = TransportListRegistry()
133
def register_transport_proto(prefix, help=None, info=None,
134
register_netloc=False):
135
transport_list_registry.register_transport(prefix, help)
137
if not prefix.endswith('://'):
138
raise ValueError(prefix)
139
register_urlparse_netloc_protocol(prefix[:-3])
142
def register_lazy_transport(prefix, module, classname):
143
if not prefix in transport_list_registry:
144
register_transport_proto(prefix)
145
transport_list_registry.register_lazy_transport_provider(prefix, module, classname)
148
def register_transport(prefix, klass, override=DEPRECATED_PARAMETER):
149
if not prefix in transport_list_registry:
150
register_transport_proto(prefix)
151
transport_list_registry.register_transport_provider(prefix, klass)
128
154
def register_urlparse_netloc_protocol(protocol):
129
155
"""Ensure that protocol is setup to be used with urlparse netloc parsing."""
130
156
if protocol not in urlparse.uses_netloc:
131
157
urlparse.uses_netloc.append(protocol)
135
# TODO: jam 20060606 urls should only be ascii, or they should raise InvalidURL
136
if isinstance(url, unicode):
137
url = url.encode('utf-8')
138
(scheme, netloc, path, params,
139
query, fragment) = urlparse.urlparse(url, allow_fragments=False)
140
username = password = host = port = None
142
username, host = netloc.split('@', 1)
144
username, password = username.split(':', 1)
145
password = urllib.unquote(password)
146
username = urllib.unquote(username)
151
host, port = host.rsplit(':', 1)
155
# TODO: Should this be ConnectionError?
156
raise errors.TransportError('%s: invalid port number' % port)
157
host = urllib.unquote(host)
159
path = urllib.unquote(path)
161
return (scheme, username, password, host, port, path)
160
def _unregister_urlparse_netloc_protocol(protocol):
161
"""Remove protocol from urlparse netloc parsing.
163
Except for tests, you should never use that function. Using it with 'http',
164
for example, will break all http transports.
166
if protocol in urlparse.uses_netloc:
167
urlparse.uses_netloc.remove(protocol)
170
def unregister_transport(scheme, factory):
171
"""Unregister a transport."""
172
l = transport_list_registry.get(scheme)
176
transport_list_registry.get(scheme).remove(i)
179
transport_list_registry.remove(scheme)
182
class _CoalescedOffset(object):
183
"""A data container for keeping track of coalesced offsets."""
185
__slots__ = ['start', 'length', 'ranges']
187
def __init__(self, start, length, ranges):
192
def __cmp__(self, other):
193
return cmp((self.start, self.length, self.ranges),
194
(other.start, other.length, other.ranges))
197
return '%s(%r, %r, %r)' % (self.__class__.__name__,
198
self.start, self.length, self.ranges)
201
class LateReadError(object):
202
"""A helper for transports which pretends to be a readable file.
204
When read() is called, errors.ReadError is raised.
207
def __init__(self, path):
211
"""a no-op - do nothing."""
214
"""Raise ReadError."""
215
raise errors.ReadError(self._path)
220
def read(self, count=-1):
227
class FileStream(object):
228
"""Base class for FileStreams."""
230
def __init__(self, transport, relpath):
231
"""Create a FileStream for relpath on transport."""
232
self.transport = transport
233
self.relpath = relpath
236
"""A hook point for subclasses that need to take action on close."""
240
del _file_streams[self.transport.abspath(self.relpath)]
243
class FileFileStream(FileStream):
244
"""A file stream object returned by open_write_stream.
246
This version uses a file like object to perform writes.
249
def __init__(self, transport, relpath, file_handle):
250
FileStream.__init__(self, transport, relpath)
251
self.file_handle = file_handle
254
self.file_handle.close()
256
def write(self, bytes):
257
self.file_handle.write(bytes)
260
class AppendBasedFileStream(FileStream):
261
"""A file stream object returned by open_write_stream.
263
This version uses append on a transport to perform writes.
266
def write(self, bytes):
267
self.transport.append_bytes(self.relpath, bytes)
164
270
class Transport(object):
340
547
def get(self, relpath):
341
548
"""Get the file at the given relative path.
550
This may fail in a number of ways:
551
- HTTP servers may return content for a directory. (unexpected
553
- FTP servers may indicate NoSuchFile for a directory.
554
- SFTP servers may give a file handle for a directory that will
557
For correct use of the interface, be sure to catch errors.PathError
558
when calling it and catch errors.ReadError when reading from the
343
561
:param relpath: The relative path to the file
562
:rtype: File-like object.
345
564
raise NotImplementedError(self.get)
347
def readv(self, relpath, offsets):
348
"""Get parts of the file at the given relative path.
350
:offsets: A list of (offset, size) tuples.
351
:return: A list or generator of (offset, data) tuples
353
def do_combined_read(combined_offsets):
355
for offset, size in combined_offsets:
357
mutter('readv coalesced %d reads.', len(combined_offsets))
358
offset = combined_offsets[0][0]
360
data = fp.read(total_size)
362
for offset, size in combined_offsets:
363
yield offset, data[pos:pos + size]
566
def get_bytes(self, relpath):
567
"""Get a raw string of the bytes for a file at the given location.
569
:param relpath: The relative path to the file
571
return self.get(relpath).read()
573
@deprecated_method(one_four)
574
def get_smart_client(self):
575
"""Return a smart client for this transport if possible.
577
A smart client doesn't imply the presence of a smart server: it implies
578
that the smart protocol can be tunnelled via this transport.
580
:raises NoSmartServer: if no smart server client is available.
582
raise errors.NoSmartServer(self.base)
584
def get_smart_medium(self):
585
"""Return a smart client medium for this transport if possible.
587
A smart medium doesn't imply the presence of a smart server: it implies
588
that the smart protocol can be tunnelled via this transport.
590
:raises NoSmartMedium: if no smart server medium is available.
592
raise errors.NoSmartMedium(self)
594
@deprecated_method(one_four)
595
def get_shared_medium(self):
596
"""Return a smart client shared medium for this transport if possible.
598
A smart medium doesn't imply the presence of a smart server: it implies
599
that the smart protocol can be tunnelled via this transport.
601
:raises NoSmartMedium: if no smart server medium is available.
603
raise errors.NoSmartMedium(self)
605
def readv(self, relpath, offsets, adjust_for_latency=False,
607
"""Get parts of the file at the given relative path.
609
:param relpath: The path to read data from.
610
:param offsets: A list of (offset, size) tuples.
611
:param adjust_for_latency: Adjust the requested offsets to accomodate
612
transport latency. This may re-order the offsets, expand them to
613
grab adjacent data when there is likely a high cost to requesting
614
data relative to delivering it.
615
:param upper_limit: When adjust_for_latency is True setting upper_limit
616
allows the caller to tell the transport about the length of the
617
file, so that requests are not issued for ranges beyond the end of
618
the file. This matters because some servers and/or transports error
619
in such a case rather than just satisfying the available ranges.
620
upper_limit should always be provided when adjust_for_latency is
621
True, and should be the size of the file in bytes.
622
:return: A list or generator of (offset, data) tuples
624
if adjust_for_latency:
625
# Design note: We may wish to have different algorithms for the
626
# expansion of the offsets per-transport. E.g. for local disk to
627
# use page-aligned expansion. If that is the case consider the
628
# following structure:
629
# - a test that transport.readv uses self._offset_expander or some
630
# similar attribute, to do the expansion
631
# - a test for each transport that it has some known-good offset
633
# - unit tests for each offset expander
634
# - a set of tests for the offset expander interface, giving
635
# baseline behaviour (which the current transport
636
# adjust_for_latency tests could be repurposed to).
637
offsets = self._sort_expand_and_combine(offsets, upper_limit)
638
return self._readv(relpath, offsets)
640
def _readv(self, relpath, offsets):
641
"""Get parts of the file at the given relative path.
643
:param relpath: The path to read.
644
:param offsets: A list of (offset, size) tuples.
645
:return: A list or generator of (offset, data) tuples
368
650
fp = self.get(relpath)
369
pending_offsets = deque(offsets)
370
combined_offsets = []
371
while len(pending_offsets):
372
offset, size = pending_offsets.popleft()
373
if not combined_offsets:
374
combined_offsets = [[offset, size]]
651
return self._seek_and_read(fp, offsets, relpath)
653
def _seek_and_read(self, fp, offsets, relpath='<unknown>'):
654
"""An implementation of readv that uses fp.seek and fp.read.
656
This uses _coalesce_offsets to issue larger reads and fewer seeks.
658
:param fp: A file-like object that supports seek() and read(size)
659
:param offsets: A list of offsets to be read from the given file.
660
:return: yield (pos, data) tuples for each request
662
# We are going to iterate multiple times, we need a list
663
offsets = list(offsets)
664
sorted_offsets = sorted(offsets)
666
# turn the list of offsets into a stack
667
offset_stack = iter(offsets)
668
cur_offset_and_size = offset_stack.next()
669
coalesced = self._coalesce_offsets(sorted_offsets,
670
limit=self._max_readv_combine,
671
fudge_factor=self._bytes_to_read_before_seek)
673
# Cache the results, but only until they have been fulfilled
675
for c_offset in coalesced:
676
# TODO: jam 20060724 it might be faster to not issue seek if
677
# we are already at the right location. This should be
679
fp.seek(c_offset.start)
680
data = fp.read(c_offset.length)
681
if len(data) < c_offset.length:
682
raise errors.ShortReadvError(relpath, c_offset.start,
683
c_offset.length, actual=len(data))
684
for suboffset, subsize in c_offset.ranges:
685
key = (c_offset.start+suboffset, subsize)
686
data_map[key] = data[suboffset:suboffset+subsize]
688
# Now that we've read some data, see if we can yield anything back
689
while cur_offset_and_size in data_map:
690
this_data = data_map.pop(cur_offset_and_size)
691
yield cur_offset_and_size[0], this_data
692
cur_offset_and_size = offset_stack.next()
694
def _sort_expand_and_combine(self, offsets, upper_limit):
697
:param offsets: A readv vector - (offset, length) tuples.
698
:param upper_limit: The highest byte offset that may be requested.
699
:return: A readv vector that will read all the regions requested by
700
offsets, in start-to-end order, with no duplicated regions,
701
expanded by the transports recommended page size.
703
offsets = sorted(offsets)
704
# short circuit empty requests
705
if len(offsets) == 0:
707
# Quick thunk to stop this function becoming a generator
708
# itself, rather we return a generator that has nothing to
712
return empty_yielder()
713
# expand by page size at either end
714
maximum_expansion = self.recommended_page_size()
716
for offset, length in offsets:
717
expansion = maximum_expansion - length
719
# we're asking for more than the minimum read anyway.
721
reduction = expansion / 2
722
new_offset = offset - reduction
723
new_length = length + expansion
725
# don't ask for anything < 0
727
if (upper_limit is not None and
728
new_offset + new_length > upper_limit):
729
new_length = upper_limit - new_offset
730
new_offsets.append((new_offset, new_length))
731
# combine the expanded offsets
733
current_offset, current_length = new_offsets[0]
734
current_finish = current_length + current_offset
735
for offset, length in new_offsets[1:]:
736
finish = offset + length
737
if offset > current_finish:
738
# there is a gap, output the current accumulator and start
739
# a new one for the region we're examining.
740
offsets.append((current_offset, current_length))
741
current_offset = offset
742
current_length = length
743
current_finish = finish
745
if finish > current_finish:
746
# extend the current accumulator to the end of the region
748
current_finish = finish
749
current_length = finish - current_offset
750
offsets.append((current_offset, current_length))
754
def _coalesce_offsets(offsets, limit=0, fudge_factor=0, max_size=0):
755
"""Yield coalesced offsets.
757
With a long list of neighboring requests, combine them
758
into a single large request, while retaining the original
760
Turns [(15, 10), (25, 10)] => [(15, 20, [(0, 10), (10, 10)])]
762
:param offsets: A list of (start, length) pairs
764
:param limit: Only combine a maximum of this many pairs Some transports
765
penalize multiple reads more than others, and sometimes it is
766
better to return early.
769
:param fudge_factor: All transports have some level of 'it is
770
better to read some more data and throw it away rather
771
than seek', so collapse if we are 'close enough'
773
:param max_size: Create coalesced offsets no bigger than this size.
774
When a single offset is bigger than 'max_size', it will keep
775
its size and be alone in the coalesced offset.
776
0 means no maximum size.
778
:return: yield _CoalescedOffset objects, which have members for where
779
to start, how much to read, and how to split those
783
cur = _CoalescedOffset(None, None, [])
785
for start, size in offsets:
787
if (last_end is not None
788
and start <= last_end + fudge_factor
789
and start >= cur.start
790
and (limit <= 0 or len(cur.ranges) < limit)
791
and (max_size <= 0 or end - cur.start <= max_size)):
792
cur.length = end - cur.start
793
cur.ranges.append((start-cur.start, size))
376
if (len (combined_offsets) < 50 and
377
combined_offsets[-1][0] + combined_offsets[-1][1] == offset):
379
combined_offsets.append([offset, size])
381
# incompatible, or over the threshold issue a read and yield
382
pending_offsets.appendleft((offset, size))
383
for result in do_combined_read(combined_offsets):
385
combined_offsets = []
386
# whatever is left is a single coalesced request
387
if len(combined_offsets):
388
for result in do_combined_read(combined_offsets):
795
if cur.start is not None:
797
cur = _CoalescedOffset(start, size, [(0, size)])
800
if cur.start is not None:
391
805
def get_multi(self, relpaths, pb=None):
392
806
"""Get a list of file-like objects, one for each entry in relpaths.
405
819
yield self.get(relpath)
408
def put(self, relpath, f, mode=None):
409
"""Copy the file-like or string object into the location.
822
def put_bytes(self, relpath, bytes, mode=None):
823
"""Atomically put the supplied bytes into the given location.
825
:param relpath: The location to put the contents, relative to the
827
:param bytes: A bytestring of data.
828
:param mode: Create the file with the given mode.
831
if not isinstance(bytes, str):
832
raise AssertionError(
833
'bytes must be a plain string, not %s' % type(bytes))
834
return self.put_file(relpath, StringIO(bytes), mode=mode)
836
def put_bytes_non_atomic(self, relpath, bytes, mode=None,
837
create_parent_dir=False,
839
"""Copy the string into the target location.
841
This function is not strictly safe to use. See
842
Transport.put_bytes_non_atomic for more information.
844
:param relpath: The remote location to put the contents.
845
:param bytes: A string object containing the raw bytes to write into
847
:param mode: Possible access permissions for new file.
848
None means do not set remote permissions.
849
:param create_parent_dir: If we cannot create the target file because
850
the parent directory does not exist, go ahead and
851
create it, and then try again.
852
:param dir_mode: Possible access permissions for new directories.
854
if not isinstance(bytes, str):
855
raise AssertionError(
856
'bytes must be a plain string, not %s' % type(bytes))
857
self.put_file_non_atomic(relpath, StringIO(bytes), mode=mode,
858
create_parent_dir=create_parent_dir,
861
def put_file(self, relpath, f, mode=None):
862
"""Copy the file-like object into the location.
411
864
:param relpath: Location to put the contents, relative to base.
412
:param f: File-like or string object.
413
:param mode: The mode for the newly created file,
414
None means just use the default
416
raise NotImplementedError(self.put)
418
def put_multi(self, files, mode=None, pb=None):
419
"""Put a set of files into the location.
421
:param files: A list of tuples of relpath, file object [(path1, file1), (path2, file2),...]
422
:param pb: An optional ProgressBar for indicating percent done.
423
:param mode: The mode for the newly created files
424
:return: The number of files copied.
427
self.put(path, f, mode=mode)
428
return len(self._iterate_over(files, put, pb, 'put', expand=True))
865
:param f: File-like object.
866
:param mode: The mode for the newly created file,
867
None means just use the default.
868
:return: The length of the file that was written.
870
# We would like to mark this as NotImplemented, but most likely
871
# transports have defined it in terms of the old api.
872
symbol_versioning.warn('Transport %s should implement put_file,'
873
' rather than implementing put() as of'
875
% (self.__class__.__name__,),
877
return self.put(relpath, f, mode=mode)
878
#raise NotImplementedError(self.put_file)
880
def put_file_non_atomic(self, relpath, f, mode=None,
881
create_parent_dir=False,
883
"""Copy the file-like object into the target location.
885
This function is not strictly safe to use. It is only meant to
886
be used when you already know that the target does not exist.
887
It is not safe, because it will open and truncate the remote
888
file. So there may be a time when the file has invalid contents.
890
:param relpath: The remote location to put the contents.
891
:param f: File-like object.
892
:param mode: Possible access permissions for new file.
893
None means do not set remote permissions.
894
:param create_parent_dir: If we cannot create the target file because
895
the parent directory does not exist, go ahead and
896
create it, and then try again.
897
:param dir_mode: Possible access permissions for new directories.
899
# Default implementation just does an atomic put.
901
return self.put_file(relpath, f, mode=mode)
902
except errors.NoSuchFile:
903
if not create_parent_dir:
905
parent_dir = osutils.dirname(relpath)
907
self.mkdir(parent_dir, mode=dir_mode)
908
return self.put_file(relpath, f, mode=mode)
430
910
def mkdir(self, relpath, mode=None):
431
911
"""Create a directory at the given path."""
437
917
self.mkdir(path, mode=mode)
438
918
return len(self._iterate_over(relpaths, mkdir, pb, 'mkdir', expand=False))
440
def append(self, relpath, f):
441
"""Append the text in the file-like or string object to
442
the supplied location.
444
returns the length of f before the content was written to it.
446
raise NotImplementedError(self.append)
920
def open_write_stream(self, relpath, mode=None):
921
"""Open a writable file stream at relpath.
923
A file stream is a file like object with a write() method that accepts
924
bytes to write.. Buffering may occur internally until the stream is
925
closed with stream.close(). Calls to readv or the get_* methods will
926
be synchronised with any internal buffering that may be present.
928
:param relpath: The relative path to the file.
929
:param mode: The mode for the newly created file,
930
None means just use the default
931
:return: A FileStream. FileStream objects have two methods, write() and
932
close(). There is no guarantee that data is committed to the file
933
if close() has not been called (even if get() is called on the same
936
raise NotImplementedError(self.open_write_stream)
938
def append_file(self, relpath, f, mode=None):
939
"""Append bytes from a file-like object to a file at relpath.
941
The file is created if it does not already exist.
943
:param f: a file-like object of the bytes to append.
944
:param mode: Unix mode for newly created files. This is not used for
947
:returns: the length of relpath before the content was written to it.
949
symbol_versioning.warn('Transport %s should implement append_file,'
950
' rather than implementing append() as of'
952
% (self.__class__.__name__,),
954
return self.append(relpath, f, mode=mode)
956
def append_bytes(self, relpath, bytes, mode=None):
957
"""Append bytes to a file at relpath.
959
The file is created if it does not already exist.
962
:param f: a string of the bytes to append.
963
:param mode: Unix mode for newly created files. This is not used for
966
:returns: the length of relpath before the content was written to it.
968
if not isinstance(bytes, str):
970
'bytes must be a plain string, not %s' % type(bytes))
971
return self.append_file(relpath, StringIO(bytes), mode=mode)
448
973
def append_multi(self, files, pb=None):
449
974
"""Append the text in each file-like or string object to
676
1212
# several questions about the transport.
680
# jam 20060426 For compatibility we copy the functions here
681
# TODO: The should be marked as deprecated
682
urlescape = urlutils.escape
683
urlunescape = urlutils.unescape
684
_urlRE = re.compile(r'^(?P<proto>[^:/\\]+)://(?P<path>.*)$')
687
def get_transport(base):
1215
def _reuse_for(self, other_base):
1216
# This is really needed for ConnectedTransport only, but it's easier to
1217
# have Transport refuses to be reused than testing that the reuse
1218
# should be asked to ConnectedTransport only.
1222
class _SharedConnection(object):
1223
"""A connection shared between several transports."""
1225
def __init__(self, connection=None, credentials=None, base=None):
1228
:param connection: An opaque object specific to each transport.
1230
:param credentials: An opaque object containing the credentials used to
1231
create the connection.
1233
self.connection = connection
1234
self.credentials = credentials
1238
class ConnectedTransport(Transport):
1239
"""A transport connected to a remote server.
1241
This class provide the basis to implement transports that need to connect
1244
Host and credentials are available as private attributes, cloning preserves
1245
them and share the underlying, protocol specific, connection.
1248
def __init__(self, base, _from_transport=None):
1251
The caller should ensure that _from_transport points at the same host
1254
:param base: transport root URL
1256
:param _from_transport: optional transport to build from. The built
1257
transport will share the connection with this transport.
1259
if not base.endswith('/'):
1262
self._user, self._password,
1263
self._host, self._port,
1264
self._path) = self._split_url(base)
1265
if _from_transport is not None:
1266
# Copy the password as it does not appear in base and will be lost
1267
# otherwise. It can appear in the _split_url above if the user
1268
# provided it on the command line. Otherwise, daughter classes will
1269
# prompt the user for one when appropriate.
1270
self._password = _from_transport._password
1272
base = self._unsplit_url(self._scheme,
1273
self._user, self._password,
1274
self._host, self._port,
1277
super(ConnectedTransport, self).__init__(base)
1278
if _from_transport is None:
1279
self._shared_connection = _SharedConnection()
1281
self._shared_connection = _from_transport._shared_connection
1283
def clone(self, offset=None):
1284
"""Return a new transport with root at self.base + offset
1286
We leave the daughter classes take advantage of the hint
1287
that it's a cloning not a raw creation.
1290
return self.__class__(self.base, _from_transport=self)
1292
return self.__class__(self.abspath(offset), _from_transport=self)
1295
def _split_url(url):
1297
Extract the server address, the credentials and the path from the url.
1299
user, password, host and path should be quoted if they contain reserved
1302
:param url: an quoted url
1304
:return: (scheme, user, password, host, port, path) tuple, all fields
1307
if isinstance(url, unicode):
1308
raise errors.InvalidURL('should be ascii:\n%r' % url)
1309
url = url.encode('utf-8')
1310
(scheme, netloc, path, params,
1311
query, fragment) = urlparse.urlparse(url, allow_fragments=False)
1312
user = password = host = port = None
1314
user, host = netloc.rsplit('@', 1)
1316
user, password = user.split(':', 1)
1317
password = urllib.unquote(password)
1318
user = urllib.unquote(user)
1323
host, port = host.rsplit(':', 1)
1327
raise errors.InvalidURL('invalid port number %s in url:\n%s' %
1330
raise errors.InvalidURL('Host empty in: %s' % url)
1332
host = urllib.unquote(host)
1333
path = urllib.unquote(path)
1335
return (scheme, user, password, host, port, path)
1338
def _unsplit_url(scheme, user, password, host, port, path):
1340
Build the full URL for the given already URL encoded path.
1342
user, password, host and path will be quoted if they contain reserved
1345
:param scheme: protocol
1349
:param password: associated password
1351
:param host: the server address
1353
:param port: the associated port
1355
:param path: the absolute path on the server
1357
:return: The corresponding URL.
1359
netloc = urllib.quote(host)
1360
if user is not None:
1361
# Note that we don't put the password back even if we
1362
# have one so that it doesn't get accidentally
1364
netloc = '%s@%s' % (urllib.quote(user), netloc)
1365
if port is not None:
1366
netloc = '%s:%d' % (netloc, port)
1367
path = urllib.quote(path)
1368
return urlparse.urlunparse((scheme, netloc, path, None, None, None))
1370
def relpath(self, abspath):
1371
"""Return the local path portion from a given absolute path"""
1372
scheme, user, password, host, port, path = self._split_url(abspath)
1374
if (scheme != self._scheme):
1375
error.append('scheme mismatch')
1376
if (user != self._user):
1377
error.append('user name mismatch')
1378
if (host != self._host):
1379
error.append('host mismatch')
1380
if (port != self._port):
1381
error.append('port mismatch')
1382
if not (path == self._path[:-1] or path.startswith(self._path)):
1383
error.append('path mismatch')
1385
extra = ', '.join(error)
1386
raise errors.PathNotChild(abspath, self.base, extra=extra)
1387
pl = len(self._path)
1388
return path[pl:].strip('/')
1390
def abspath(self, relpath):
1391
"""Return the full url to the given relative path.
1393
:param relpath: the relative path urlencoded
1395
:returns: the Unicode version of the absolute path for relpath.
1397
relative = urlutils.unescape(relpath).encode('utf-8')
1398
path = self._combine_paths(self._path, relative)
1399
return self._unsplit_url(self._scheme, self._user, self._password,
1400
self._host, self._port,
1403
def _remote_path(self, relpath):
1404
"""Return the absolute path part of the url to the given relative path.
1406
This is the path that the remote server expect to receive in the
1407
requests, daughter classes should redefine this method if needed and
1408
use the result to build their requests.
1410
:param relpath: the path relative to the transport base urlencoded.
1412
:return: the absolute Unicode path on the server,
1414
relative = urlutils.unescape(relpath).encode('utf-8')
1415
remote_path = self._combine_paths(self._path, relative)
1418
def _get_shared_connection(self):
1419
"""Get the object shared amongst cloned transports.
1421
This should be used only by classes that needs to extend the sharing
1422
with objects other than transports.
1424
Use _get_connection to get the connection itself.
1426
return self._shared_connection
1428
def _set_connection(self, connection, credentials=None):
1429
"""Record a newly created connection with its associated credentials.
1431
Note: To ensure that connection is still shared after a temporary
1432
failure and a new one needs to be created, daughter classes should
1433
always call this method to set the connection and do so each time a new
1434
connection is created.
1436
:param connection: An opaque object representing the connection used by
1439
:param credentials: An opaque object representing the credentials
1440
needed to create the connection.
1442
self._shared_connection.connection = connection
1443
self._shared_connection.credentials = credentials
1445
def _get_connection(self):
1446
"""Returns the transport specific connection object."""
1447
return self._shared_connection.connection
1449
def _get_credentials(self):
1450
"""Returns the credentials used to establish the connection."""
1451
return self._shared_connection.credentials
1453
def _update_credentials(self, credentials):
1454
"""Update the credentials of the current connection.
1456
Some protocols can renegociate the credentials within a connection,
1457
this method allows daughter classes to share updated credentials.
1459
:param credentials: the updated credentials.
1461
# We don't want to call _set_connection here as we are only updating
1462
# the credentials not creating a new connection.
1463
self._shared_connection.credentials = credentials
1465
def _reuse_for(self, other_base):
1466
"""Returns a transport sharing the same connection if possible.
1468
Note: we share the connection if the expected credentials are the
1469
same: (host, port, user). Some protocols may disagree and redefine the
1470
criteria in daughter classes.
1472
Note: we don't compare the passwords here because other_base may have
1473
been obtained from an existing transport.base which do not mention the
1476
:param other_base: the URL we want to share the connection with.
1478
:return: A new transport or None if the connection cannot be shared.
1481
(scheme, user, password,
1482
host, port, path) = self._split_url(other_base)
1483
except errors.InvalidURL:
1484
# No hope in trying to reuse an existing transport for an invalid
1489
# Don't compare passwords, they may be absent from other_base or from
1490
# self and they don't carry more information than user anyway.
1491
if (scheme == self._scheme
1492
and user == self._user
1493
and host == self._host
1494
and port == self._port):
1495
if not path.endswith('/'):
1496
# This normally occurs at __init__ time, but it's easier to do
1497
# it now to avoid creating two transports for the same base.
1499
if self._path == path:
1500
# shortcut, it's really the same transport
1502
# We don't call clone here because the intent is different: we
1503
# build a new transport on a different base (which may be totally
1504
# unrelated) but we share the connection.
1505
transport = self.__class__(other_base, _from_transport=self)
1509
# We try to recognize an url lazily (ignoring user, password, etc)
1510
_urlRE = re.compile(r'^(?P<proto>[^:/\\]+)://(?P<rest>.*)$')
1512
def get_transport(base, possible_transports=None):
688
1513
"""Open a transport to access a URL or directory.
690
base is either a URL or a directory name.
1515
:param base: either a URL or a directory name.
1517
:param transports: optional reusable transports list. If not None, created
1518
transports will be added to the list.
1520
:return: A new transport optionally sharing its connection with one of
1521
possible_transports.
692
# TODO: give a better error if base looks like a url but there's no
693
# handler for the scheme?
694
global _protocol_handlers
695
1523
if base is None:
1526
from bzrlib.directory_service import directories
1527
base = directories.dereference(base)
698
1529
def convert_path_to_url(base, error_str):
699
1530
m = _urlRE.match(base)
701
1532
# This looks like a URL, but we weren't able to
702
1533
# instantiate it as such raise an appropriate error
703
raise errors.InvalidURL(base, error_str % m.group('proto'))
1534
# FIXME: we have a 'error_str' unused and we use last_err below
1535
raise errors.UnsupportedProtocol(base, last_err)
704
1536
# This doesn't look like a protocol, consider it a local path
705
1537
new_base = urlutils.local_path_to_url(base)
706
mutter('converting os path %r => url %s' , base, new_base)
1538
# mutter('converting os path %r => url %s', base, new_base)
709
1541
# Catch any URLs which are passing Unicode rather than ASCII
713
1545
# Only local paths can be Unicode
714
1546
base = convert_path_to_url(base,
715
1547
'URLs must be properly escaped (protocol: %s)')
717
for proto, factory_list in _protocol_handlers.iteritems():
1550
if possible_transports is not None:
1551
for t in possible_transports:
1552
t_same_connection = t._reuse_for(base)
1553
if t_same_connection is not None:
1554
# Add only new transports
1555
if t_same_connection not in possible_transports:
1556
possible_transports.append(t_same_connection)
1557
return t_same_connection
1559
for proto, factory_list in transport_list_registry.iteritems():
718
1560
if proto is not None and base.startswith(proto):
719
t = _try_transport_factories(base, factory_list)
1561
transport, last_err = _try_transport_factories(base, factory_list)
1563
if possible_transports is not None:
1564
if transport in possible_transports:
1565
raise AssertionError()
1566
possible_transports.append(transport)
723
1569
# We tried all the different protocols, now try one last time
724
1570
# as a local protocol
725
1571
base = convert_path_to_url(base, 'Unsupported protocol: %s')
727
1573
# The default handler is the filesystem handler, stored as protocol None
728
return _try_transport_factories(base, _protocol_handlers[None])
1574
factory_list = transport_list_registry.get(None)
1575
transport, last_err = _try_transport_factories(base, factory_list)
731
1580
def _try_transport_factories(base, factory_list):
732
1582
for factory in factory_list:
735
except DependencyNotPresent, e:
1584
return factory.get_obj()(base), None
1585
except errors.DependencyNotPresent, e:
736
1586
mutter("failed to instantiate transport %r for %r: %r" %
737
1587
(factory, base, e))
1590
return None, last_err
1593
def do_catching_redirections(action, transport, redirected):
1594
"""Execute an action with given transport catching redirections.
1596
This is a facility provided for callers needing to follow redirections
1597
silently. The silence is relative: it is the caller responsability to
1598
inform the user about each redirection or only inform the user of a user
1599
via the exception parameter.
1601
:param action: A callable, what the caller want to do while catching
1603
:param transport: The initial transport used.
1604
:param redirected: A callable receiving the redirected transport and the
1605
RedirectRequested exception.
1607
:return: Whatever 'action' returns
1609
MAX_REDIRECTIONS = 8
1611
# If a loop occurs, there is little we can do. So we don't try to detect
1612
# them, just getting out if too much redirections occurs. The solution
1613
# is outside: where the loop is defined.
1614
for redirections in range(MAX_REDIRECTIONS):
1616
return action(transport)
1617
except errors.RedirectRequested, e:
1618
redirection_notice = '%s is%s redirected to %s' % (
1619
e.source, e.permanently, e.target)
1620
transport = redirected(transport, e, redirection_notice)
1622
# Loop exited without resolving redirect ? Either the
1623
# user has kept a very very very old reference or a loop
1624
# occurred in the redirections. Nothing we can cure here:
1625
# tell the user. Note that as the user has been informed
1626
# about each redirection (it is the caller responsibility
1627
# to do that in redirected via the provided
1628
# redirection_notice). The caller may provide more
1629
# information if needed (like what file or directory we
1630
# were trying to act upon when the redirection loop
1632
raise errors.TooManyRedirections
742
1635
class Server(object):
771
1664
raise NotImplementedError
773
1666
def get_bogus_url(self):
774
"""Return a url for this protocol, that will fail to connect."""
1667
"""Return a url for this protocol, that will fail to connect.
1669
This may raise NotImplementedError to indicate that this server cannot
775
1672
raise NotImplementedError
778
class TransportTestProviderAdapter(object):
779
"""A tool to generate a suite testing all transports for a single test.
781
This is done by copying the test once for each transport and injecting
782
the transport_class and transport_server classes into each copy. Each copy
783
is also given a new id() to make it easy to identify.
786
def adapt(self, test):
788
for klass, server_factory in self._test_permutations():
789
new_test = deepcopy(test)
790
new_test.transport_class = klass
791
new_test.transport_server = server_factory
792
def make_new_test_id():
793
new_id = "%s(%s)" % (new_test.id(), server_factory.__name__)
794
return lambda: new_id
795
new_test.id = make_new_test_id()
796
result.addTest(new_test)
799
def get_transport_test_permutations(self, module):
800
"""Get the permutations module wants to have tested."""
801
if not hasattr(module, 'get_test_permutations'):
802
warning("transport module %s doesn't provide get_test_permutations()"
805
return module.get_test_permutations()
807
def _test_permutations(self):
808
"""Return a list of the klass, server_factory pairs to test."""
810
for module in _get_transport_modules():
812
result.extend(self.get_transport_test_permutations(reduce(getattr,
813
(module).split('.')[1:],
814
__import__(module))))
815
except errors.DependencyNotPresent, e:
816
# Continue even if a dependency prevents us
817
# from running this test
822
class TransportLogger(object):
823
"""Adapt a transport to get clear logging data on api calls.
825
Feel free to extend to log whatever calls are of interest.
828
def __init__(self, adapted):
829
self._adapted = adapted
833
self._calls.append((name,))
834
return self._adapted.get(name)
836
def __getattr__(self, name):
837
"""Thunk all undefined access through to self._adapted."""
838
# raise AttributeError, name
839
return getattr(self._adapted, name)
841
def readv(self, name, offsets):
842
self._calls.append((name, offsets))
843
return self._adapted.readv(name, offsets)
846
1675
# None is the default transport, for things with no url scheme
847
register_lazy_transport(None, 'bzrlib.transport.local', 'LocalTransport')
1676
register_transport_proto('file://',
1677
help="Access using the standard filesystem (default)")
848
1678
register_lazy_transport('file://', 'bzrlib.transport.local', 'LocalTransport')
1679
transport_list_registry.set_default_transport("file://")
1681
register_transport_proto('sftp://',
1682
help="Access using SFTP (most SSH servers provide SFTP).",
1683
register_netloc=True)
849
1684
register_lazy_transport('sftp://', 'bzrlib.transport.sftp', 'SFTPTransport')
1685
# Decorated http transport
1686
register_transport_proto('http+urllib://',
1687
# help="Read-only access of branches exported on the web."
1688
register_netloc=True)
850
1689
register_lazy_transport('http+urllib://', 'bzrlib.transport.http._urllib',
851
1690
'HttpTransport_urllib')
1691
register_transport_proto('https+urllib://',
1692
# help="Read-only access of branches exported on the web using SSL."
1693
register_netloc=True)
852
1694
register_lazy_transport('https+urllib://', 'bzrlib.transport.http._urllib',
853
1695
'HttpTransport_urllib')
1696
register_transport_proto('http+pycurl://',
1697
# help="Read-only access of branches exported on the web."
1698
register_netloc=True)
854
1699
register_lazy_transport('http+pycurl://', 'bzrlib.transport.http._pycurl',
855
1700
'PyCurlTransport')
1701
register_transport_proto('https+pycurl://',
1702
# help="Read-only access of branches exported on the web using SSL."
1703
register_netloc=True)
856
1704
register_lazy_transport('https+pycurl://', 'bzrlib.transport.http._pycurl',
857
1705
'PyCurlTransport')
1706
# Default http transports (last declared wins (if it can be imported))
1707
register_transport_proto('http://',
1708
help="Read-only access of branches exported on the web.")
1709
register_transport_proto('https://',
1710
help="Read-only access of branches exported on the web using SSL.")
858
1711
register_lazy_transport('http://', 'bzrlib.transport.http._urllib',
859
1712
'HttpTransport_urllib')
860
1713
register_lazy_transport('https://', 'bzrlib.transport.http._urllib',
861
1714
'HttpTransport_urllib')
862
register_lazy_transport('http://', 'bzrlib.transport.http._pycurl', 'PyCurlTransport')
863
register_lazy_transport('https://', 'bzrlib.transport.http._pycurl', 'PyCurlTransport')
1715
register_lazy_transport('http://', 'bzrlib.transport.http._pycurl',
1717
register_lazy_transport('https://', 'bzrlib.transport.http._pycurl',
1720
register_transport_proto('ftp://', help="Access using passive FTP.")
864
1721
register_lazy_transport('ftp://', 'bzrlib.transport.ftp', 'FtpTransport')
1722
register_transport_proto('aftp://', help="Access using active FTP.")
865
1723
register_lazy_transport('aftp://', 'bzrlib.transport.ftp', 'FtpTransport')
866
register_lazy_transport('memory://', 'bzrlib.transport.memory', 'MemoryTransport')
867
register_lazy_transport('readonly+', 'bzrlib.transport.readonly', 'ReadonlyTransportDecorator')
868
register_lazy_transport('fakenfs+', 'bzrlib.transport.fakenfs', 'FakeNFSTransportDecorator')
869
register_lazy_transport('vfat+',
1725
register_transport_proto('memory://')
1726
register_lazy_transport('memory://', 'bzrlib.transport.memory',
1729
# chroots cannot be implicitly accessed, they must be explicitly created:
1730
register_transport_proto('chroot+')
1732
register_transport_proto('readonly+',
1733
# help="This modifier converts any transport to be readonly."
1735
register_lazy_transport('readonly+', 'bzrlib.transport.readonly',
1736
'ReadonlyTransportDecorator')
1738
register_transport_proto('fakenfs+')
1739
register_lazy_transport('fakenfs+', 'bzrlib.transport.fakenfs',
1740
'FakeNFSTransportDecorator')
1742
register_transport_proto('trace+')
1743
register_lazy_transport('trace+', 'bzrlib.transport.trace',
1744
'TransportTraceDecorator')
1746
register_transport_proto('unlistable+')
1747
register_lazy_transport('unlistable+', 'bzrlib.transport.unlistable',
1748
'UnlistableTransportDecorator')
1750
register_transport_proto('brokenrename+')
1751
register_lazy_transport('brokenrename+', 'bzrlib.transport.brokenrename',
1752
'BrokenRenameTransportDecorator')
1754
register_transport_proto('vfat+')
1755
register_lazy_transport('vfat+',
870
1756
'bzrlib.transport.fakevfat',
871
1757
'FakeVFATTransportDecorator')
1759
register_transport_proto('nosmart+')
1760
register_lazy_transport('nosmart+', 'bzrlib.transport.nosmart',
1761
'NoSmartTransportDecorator')
1763
# These two schemes were registered, but don't seem to have an actual transport
1764
# protocol registered
1765
for scheme in ['ssh', 'bzr+loopback']:
1766
register_urlparse_netloc_protocol(scheme)
1769
register_transport_proto('bzr://',
1770
help="Fast access using the Bazaar smart server.",
1771
register_netloc=True)
1773
register_lazy_transport('bzr://', 'bzrlib.transport.remote',
1774
'RemoteTCPTransport')
1775
register_transport_proto('bzr-v2://', register_netloc=True)
1777
register_lazy_transport('bzr-v2://', 'bzrlib.transport.remote',
1778
'RemoteTCPTransportV2Only')
1779
register_transport_proto('bzr+http://',
1780
# help="Fast access using the Bazaar smart server over HTTP."
1781
register_netloc=True)
1782
register_lazy_transport('bzr+http://', 'bzrlib.transport.remote',
1783
'RemoteHTTPTransport')
1784
register_transport_proto('bzr+https://',
1785
# help="Fast access using the Bazaar smart server over HTTPS."
1786
register_netloc=True)
1787
register_lazy_transport('bzr+https://',
1788
'bzrlib.transport.remote',
1789
'RemoteHTTPTransport')
1790
register_transport_proto('bzr+ssh://',
1791
help="Fast access using the Bazaar smart server over SSH.",
1792
register_netloc=True)
1793
register_lazy_transport('bzr+ssh://', 'bzrlib.transport.remote',
1794
'RemoteSSHTransport')