135
class _SFTPReadvHelper(object):
136
"""A class to help with managing the state of a readv request."""
138
# See _get_requests for an explanation.
139
_max_request_size = 32768
141
def __init__(self, original_offsets, relpath, _report_activity):
142
"""Create a new readv helper.
144
:param original_offsets: The original requests given by the caller of
146
:param relpath: The name of the file (if known)
147
:param _report_activity: A Transport._report_activity bound method,
148
to be called as data arrives.
150
self.original_offsets = list(original_offsets)
151
self.relpath = relpath
152
self._report_activity = _report_activity
154
def _get_requests(self):
155
"""Break up the offsets into individual requests over sftp.
157
The SFTP spec only requires implementers to support 32kB requests. We
158
could try something larger (openssh supports 64kB), but then we have to
159
handle requests that fail.
160
So instead, we just break up our maximum chunks into 32kB chunks, and
161
asyncronously requests them.
162
Newer versions of paramiko would do the chunking for us, but we want to
163
start processing results right away, so we do it ourselves.
165
# TODO: Because we issue async requests, we don't 'fudge' any extra
166
# data. I'm not 100% sure that is the best choice.
168
# The first thing we do, is to collapse the individual requests as much
169
# as possible, so we don't issues requests <32kB
170
sorted_offsets = sorted(self.original_offsets)
171
coalesced = list(ConnectedTransport._coalesce_offsets(sorted_offsets,
172
limit=0, fudge_factor=0))
174
for c_offset in coalesced:
175
start = c_offset.start
176
size = c_offset.length
178
# Break this up into 32kB requests
180
next_size = min(size, self._max_request_size)
181
requests.append((start, next_size))
184
if 'sftp' in debug.debug_flags:
185
mutter('SFTP.readv(%s) %s offsets => %s coalesced => %s requests',
186
self.relpath, len(sorted_offsets), len(coalesced),
190
def request_and_yield_offsets(self, fp):
191
"""Request the data from the remote machine, yielding the results.
193
:param fp: A Paramiko SFTPFile object that supports readv.
194
:return: Yield the data requested by the original readv caller, one by
197
requests = self._get_requests()
198
offset_iter = iter(self.original_offsets)
199
cur_offset, cur_size = offset_iter.next()
200
# paramiko .readv() yields strings that are in the order of the requests
201
# So we track the current request to know where the next data is
202
# being returned from.
208
# This is used to buffer chunks which we couldn't process yet
209
# It is (start, end, data) tuples.
211
# Create an 'unlimited' data stream, so we stop based on requests,
212
# rather than just because the data stream ended. This lets us detect
214
data_stream = itertools.chain(fp.readv(requests),
215
itertools.repeat(None))
216
for (start, length), data in itertools.izip(requests, data_stream):
218
if cur_coalesced is not None:
219
raise errors.ShortReadvError(self.relpath,
220
start, length, len(data))
221
if len(data) != length:
222
raise errors.ShortReadvError(self.relpath,
223
start, length, len(data))
224
self._report_activity(length, 'read')
226
# This is the first request, just buffer it
227
buffered_data = [data]
228
buffered_len = length
230
elif start == last_end:
231
# The data we are reading fits neatly on the previous
232
# buffer, so this is all part of a larger coalesced range.
233
buffered_data.append(data)
234
buffered_len += length
236
# We have an 'interrupt' in the data stream. So we know we are
237
# at a request boundary.
239
# We haven't consumed the buffer so far, so put it into
240
# data_chunks, and continue.
241
buffered = ''.join(buffered_data)
242
data_chunks.append((input_start, buffered))
244
buffered_data = [data]
245
buffered_len = length
246
last_end = start + length
247
if input_start == cur_offset and cur_size <= buffered_len:
248
# Simplify the next steps a bit by transforming buffered_data
249
# into a single string. We also have the nice property that
250
# when there is only one string ''.join([x]) == x, so there is
252
buffered = ''.join(buffered_data)
253
# Clean out buffered data so that we keep memory
257
# TODO: We *could* also consider the case where cur_offset is in
258
# in the buffered range, even though it doesn't *start*
259
# the buffered range. But for packs we pretty much always
260
# read in order, so you won't get any extra data in the
262
while (input_start == cur_offset
263
and (buffered_offset + cur_size) <= buffered_len):
264
# We've buffered enough data to process this request, spit it
266
cur_data = buffered[buffered_offset:buffered_offset + cur_size]
267
# move the direct pointer into our buffered data
268
buffered_offset += cur_size
269
# Move the start-of-buffer pointer
270
input_start += cur_size
271
# Yield the requested data
272
yield cur_offset, cur_data
273
cur_offset, cur_size = offset_iter.next()
274
# at this point, we've consumed as much of buffered as we can,
275
# so break off the portion that we consumed
276
if buffered_offset == len(buffered_data):
277
# No tail to leave behind
281
buffered = buffered[buffered_offset:]
282
buffered_data = [buffered]
283
buffered_len = len(buffered)
285
buffered = ''.join(buffered_data)
287
data_chunks.append((input_start, buffered))
289
if 'sftp' in debug.debug_flags:
290
mutter('SFTP readv left with %d out-of-order bytes',
291
sum(map(lambda x: len(x[1]), data_chunks)))
292
# We've processed all the readv data, at this point, anything we
293
# couldn't process is in data_chunks. This doesn't happen often, so
294
# this code path isn't optimized
295
# We use an interesting process for data_chunks
296
# Specifically if we have "bisect_left([(start, len, entries)],
298
# If start == qstart, then we get the specific node. Otherwise we
299
# get the previous node
301
idx = bisect.bisect_left(data_chunks, (cur_offset,))
302
if idx < len(data_chunks) and data_chunks[idx][0] == cur_offset:
303
# The data starts here
304
data = data_chunks[idx][1][:cur_size]
306
# The data is in a portion of a previous page
308
sub_offset = cur_offset - data_chunks[idx][0]
309
data = data_chunks[idx][1]
310
data = data[sub_offset:sub_offset + cur_size]
312
# We are missing the page where the data should be found,
315
if len(data) != cur_size:
316
raise AssertionError('We must have miscalulated.'
317
' We expected %d bytes, but only found %d'
318
% (cur_size, len(data)))
319
yield cur_offset, data
320
cur_offset, cur_size = offset_iter.next()
323
147
class SFTPTransport(ConnectedTransport):
324
148
"""Transport implementation for SFTP access."""
454
259
readv = getattr(fp, 'readv', None)
456
261
return self._sftp_readv(fp, offsets, relpath)
457
if 'sftp' in debug.debug_flags:
458
mutter('seek and read %s offsets', len(offsets))
262
mutter('seek and read %s offsets', len(offsets))
459
263
return self._seek_and_read(fp, offsets, relpath)
460
264
except (IOError, paramiko.SSHException), e:
461
265
self._translate_io_exception(e, path, ': error retrieving')
463
def recommended_page_size(self):
464
"""See Transport.recommended_page_size().
466
For SFTP we suggest a large page size to reduce the overhead
467
introduced by latency.
471
def _sftp_readv(self, fp, offsets, relpath):
267
def _sftp_readv(self, fp, offsets, relpath='<unknown>'):
472
268
"""Use the readv() member of fp to do async readv.
474
Then read them using paramiko.readv(). paramiko.readv()
270
And then read them using paramiko.readv(). paramiko.readv()
475
271
does not support ranges > 64K, so it caps the request size, and
476
just reads until it gets all the stuff it wants.
272
just reads until it gets all the stuff it wants
478
helper = _SFTPReadvHelper(offsets, relpath, self._report_activity)
479
return helper.request_and_yield_offsets(fp)
274
offsets = list(offsets)
275
sorted_offsets = sorted(offsets)
277
# The algorithm works as follows:
278
# 1) Coalesce nearby reads into a single chunk
279
# This generates a list of combined regions, the total size
280
# and the size of the sub regions. This coalescing step is limited
281
# in the number of nearby chunks to combine, and is allowed to
282
# skip small breaks in the requests. Limiting it makes sure that
283
# we can start yielding some data earlier, and skipping means we
284
# make fewer requests. (Beneficial even when using async)
285
# 2) Break up this combined regions into chunks that are smaller
286
# than 64KiB. Technically the limit is 65536, but we are a
287
# little bit conservative. This is because sftp has a maximum
288
# return chunk size of 64KiB (max size of an unsigned short)
289
# 3) Issue a readv() to paramiko to create an async request for
291
# 4) Read in the data as it comes back, until we've read one
292
# continuous section as determined in step 1
293
# 5) Break up the full sections into hunks for the original requested
294
# offsets. And put them in a cache
295
# 6) Check if the next request is in the cache, and if it is, remove
296
# it from the cache, and yield its data. Continue until no more
297
# entries are in the cache.
298
# 7) loop back to step 4 until all data has been read
300
# TODO: jam 20060725 This could be optimized one step further, by
301
# attempting to yield whatever data we have read, even before
302
# the first coallesced section has been fully processed.
304
# When coalescing for use with readv(), we don't really need to
305
# use any fudge factor, because the requests are made asynchronously
306
coalesced = list(self._coalesce_offsets(sorted_offsets,
307
limit=self._max_readv_combine,
311
for c_offset in coalesced:
312
start = c_offset.start
313
size = c_offset.length
315
# We need to break this up into multiple requests
317
next_size = min(size, self._max_request_size)
318
requests.append((start, next_size))
322
mutter('SFTP.readv() %s offsets => %s coalesced => %s requests',
323
len(offsets), len(coalesced), len(requests))
325
# Queue the current read until we have read the full coalesced section
328
cur_coalesced_stack = iter(coalesced)
329
cur_coalesced = cur_coalesced_stack.next()
331
# Cache the results, but only until they have been fulfilled
333
# turn the list of offsets into a stack
334
offset_stack = iter(offsets)
335
cur_offset_and_size = offset_stack.next()
337
for data in fp.readv(requests):
339
cur_data_len += len(data)
341
if cur_data_len < cur_coalesced.length:
343
assert cur_data_len == cur_coalesced.length, \
344
"Somehow we read too much: %s != %s" % (cur_data_len,
345
cur_coalesced.length)
346
all_data = ''.join(cur_data)
350
for suboffset, subsize in cur_coalesced.ranges:
351
key = (cur_coalesced.start+suboffset, subsize)
352
data_map[key] = all_data[suboffset:suboffset+subsize]
354
# Now that we've read some data, see if we can yield anything back
355
while cur_offset_and_size in data_map:
356
this_data = data_map.pop(cur_offset_and_size)
357
yield cur_offset_and_size[0], this_data
358
cur_offset_and_size = offset_stack.next()
360
# We read a coalesced entry, so mark it as done
362
# Now that we've read all of the data for this coalesced section
364
cur_coalesced = cur_coalesced_stack.next()
366
if cur_coalesced is not None:
367
raise errors.ShortReadvError(relpath, cur_coalesced.start,
368
cur_coalesced.length, len(data))
481
370
def put_file(self, relpath, f, mode=None):
726
# ------------- server test implementation --------------
729
from bzrlib.tests.stub_sftp import StubServer, StubSFTPServer
731
STUB_SERVER_KEY = """
732
-----BEGIN RSA PRIVATE KEY-----
733
MIICWgIBAAKBgQDTj1bqB4WmayWNPB+8jVSYpZYk80Ujvj680pOTh2bORBjbIAyz
734
oWGW+GUjzKxTiiPvVmxFgx5wdsFvF03v34lEVVhMpouqPAYQ15N37K/ir5XY+9m/
735
d8ufMCkjeXsQkKqFbAlQcnWMCRnOoPHS3I4vi6hmnDDeeYTSRvfLbW0fhwIBIwKB
736
gBIiOqZYaoqbeD9OS9z2K9KR2atlTxGxOJPXiP4ESqP3NVScWNwyZ3NXHpyrJLa0
737
EbVtzsQhLn6rF+TzXnOlcipFvjsem3iYzCpuChfGQ6SovTcOjHV9z+hnpXvQ/fon
738
soVRZY65wKnF7IAoUwTmJS9opqgrN6kRgCd3DASAMd1bAkEA96SBVWFt/fJBNJ9H
739
tYnBKZGw0VeHOYmVYbvMSstssn8un+pQpUm9vlG/bp7Oxd/m+b9KWEh2xPfv6zqU
740
avNwHwJBANqzGZa/EpzF4J8pGti7oIAPUIDGMtfIcmqNXVMckrmzQ2vTfqtkEZsA
741
4rE1IERRyiJQx6EJsz21wJmGV9WJQ5kCQQDwkS0uXqVdFzgHO6S++tjmjYcxwr3g
742
H0CoFYSgbddOT6miqRskOQF3DZVkJT3kyuBgU2zKygz52ukQZMqxCb1fAkASvuTv
743
qfpH87Qq5kQhNKdbbwbmd2NxlNabazPijWuphGTdW0VfJdWfklyS2Kr+iqrs/5wV
744
HhathJt636Eg7oIjAkA8ht3MQ+XSl9yIJIS8gVpbPxSw5OMfw0PjVE7tBdQruiSc
745
nvuQES5C9BMHjF39LZiGH1iLQy7FgdHyoP+eodI7
746
-----END RSA PRIVATE KEY-----
750
class SocketListener(threading.Thread):
752
def __init__(self, callback):
753
threading.Thread.__init__(self)
754
self._callback = callback
755
self._socket = socket.socket()
756
self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
757
self._socket.bind(('localhost', 0))
758
self._socket.listen(1)
759
self.port = self._socket.getsockname()[1]
760
self._stop_event = threading.Event()
763
# called from outside this thread
764
self._stop_event.set()
765
# use a timeout here, because if the test fails, the server thread may
766
# never notice the stop_event.
772
readable, writable_unused, exception_unused = \
773
select.select([self._socket], [], [], 0.1)
774
if self._stop_event.isSet():
776
if len(readable) == 0:
779
s, addr_unused = self._socket.accept()
780
# because the loopback socket is inline, and transports are
781
# never explicitly closed, best to launch a new thread.
782
threading.Thread(target=self._callback, args=(s,)).start()
783
except socket.error, x:
784
sys.excepthook(*sys.exc_info())
785
warning('Socket error during accept() within unit test server'
788
# probably a failed test; unit test thread will log the
790
sys.excepthook(*sys.exc_info())
791
warning('Exception from within unit test server thread: %r' %
795
class SocketDelay(object):
796
"""A socket decorator to make TCP appear slower.
798
This changes recv, send, and sendall to add a fixed latency to each python
799
call if a new roundtrip is detected. That is, when a recv is called and the
800
flag new_roundtrip is set, latency is charged. Every send and send_all
803
In addition every send, sendall and recv sleeps a bit per character send to
806
Not all methods are implemented, this is deliberate as this class is not a
807
replacement for the builtin sockets layer. fileno is not implemented to
808
prevent the proxy being bypassed.
812
_proxied_arguments = dict.fromkeys([
813
"close", "getpeername", "getsockname", "getsockopt", "gettimeout",
814
"setblocking", "setsockopt", "settimeout", "shutdown"])
816
def __init__(self, sock, latency, bandwidth=1.0,
819
:param bandwith: simulated bandwith (MegaBit)
820
:param really_sleep: If set to false, the SocketDelay will just
821
increase a counter, instead of calling time.sleep. This is useful for
822
unittesting the SocketDelay.
825
self.latency = latency
826
self.really_sleep = really_sleep
827
self.time_per_byte = 1 / (bandwidth / 8.0 * 1024 * 1024)
828
self.new_roundtrip = False
831
if self.really_sleep:
834
SocketDelay.simulated_time += s
836
def __getattr__(self, attr):
837
if attr in SocketDelay._proxied_arguments:
838
return getattr(self.sock, attr)
839
raise AttributeError("'SocketDelay' object has no attribute %r" %
843
return SocketDelay(self.sock.dup(), self.latency, self.time_per_byte,
846
def recv(self, *args):
847
data = self.sock.recv(*args)
848
if data and self.new_roundtrip:
849
self.new_roundtrip = False
850
self.sleep(self.latency)
851
self.sleep(len(data) * self.time_per_byte)
854
def sendall(self, data, flags=0):
855
if not self.new_roundtrip:
856
self.new_roundtrip = True
857
self.sleep(self.latency)
858
self.sleep(len(data) * self.time_per_byte)
859
return self.sock.sendall(data, flags)
861
def send(self, data, flags=0):
862
if not self.new_roundtrip:
863
self.new_roundtrip = True
864
self.sleep(self.latency)
865
bytes_sent = self.sock.send(data, flags)
866
self.sleep(bytes_sent * self.time_per_byte)
870
class SFTPServer(Server):
871
"""Common code for SFTP server facilities."""
873
def __init__(self, server_interface=StubServer):
874
self._original_vendor = None
876
self._server_homedir = None
877
self._listener = None
879
self._vendor = ssh.ParamikoVendor()
880
self._server_interface = server_interface
885
def _get_sftp_url(self, path):
886
"""Calculate an sftp url to this server for path."""
887
return 'sftp://foo:bar@localhost:%d/%s' % (self._listener.port, path)
889
def log(self, message):
890
"""StubServer uses this to log when a new server is created."""
891
self.logs.append(message)
893
def _run_server_entry(self, sock):
894
"""Entry point for all implementations of _run_server.
896
If self.add_latency is > 0.000001 then sock is given a latency adding
899
if self.add_latency > 0.000001:
900
sock = SocketDelay(sock, self.add_latency)
901
return self._run_server(sock)
903
def _run_server(self, s):
904
ssh_server = paramiko.Transport(s)
905
key_file = pathjoin(self._homedir, 'test_rsa.key')
906
f = open(key_file, 'w')
907
f.write(STUB_SERVER_KEY)
909
host_key = paramiko.RSAKey.from_private_key_file(key_file)
910
ssh_server.add_server_key(host_key)
911
server = self._server_interface(self)
912
ssh_server.set_subsystem_handler('sftp', paramiko.SFTPServer,
913
StubSFTPServer, root=self._root,
914
home=self._server_homedir)
915
event = threading.Event()
916
ssh_server.start_server(event, server)
919
def setUp(self, backing_server=None):
920
# XXX: TODO: make sftpserver back onto backing_server rather than local
922
assert (backing_server is None or
923
isinstance(backing_server, local.LocalURLServer)), (
924
"backing_server should not be %r, because this can only serve the "
925
"local current working directory." % (backing_server,))
926
self._original_vendor = ssh._ssh_vendor_manager._cached_ssh_vendor
927
ssh._ssh_vendor_manager._cached_ssh_vendor = self._vendor
928
if sys.platform == 'win32':
929
# Win32 needs to use the UNICODE api
930
self._homedir = getcwd()
932
# But Linux SFTP servers should just deal in bytestreams
933
self._homedir = os.getcwd()
934
if self._server_homedir is None:
935
self._server_homedir = self._homedir
937
if sys.platform == 'win32':
939
self._listener = SocketListener(self._run_server_entry)
940
self._listener.setDaemon(True)
941
self._listener.start()
944
"""See bzrlib.transport.Server.tearDown."""
945
self._listener.stop()
946
ssh._ssh_vendor_manager._cached_ssh_vendor = self._original_vendor
948
def get_bogus_url(self):
949
"""See bzrlib.transport.Server.get_bogus_url."""
950
# this is chosen to try to prevent trouble with proxies, wierd dns, etc
951
# we bind a random socket, so that we get a guaranteed unused port
952
# we just never listen on that port
954
s.bind(('localhost', 0))
955
return 'sftp://%s:%s/' % s.getsockname()
958
class SFTPFullAbsoluteServer(SFTPServer):
959
"""A test server for sftp transports, using absolute urls and ssh."""
962
"""See bzrlib.transport.Server.get_url."""
963
homedir = self._homedir
964
if sys.platform != 'win32':
965
# Remove the initial '/' on all platforms but win32
966
homedir = homedir[1:]
967
return self._get_sftp_url(urlutils.escape(homedir))
970
class SFTPServerWithoutSSH(SFTPServer):
971
"""An SFTP server that uses a simple TCP socket pair rather than SSH."""
974
super(SFTPServerWithoutSSH, self).__init__()
975
self._vendor = ssh.LoopbackVendor()
977
def _run_server(self, sock):
978
# Re-import these as locals, so that they're still accessible during
979
# interpreter shutdown (when all module globals get set to None, leading
980
# to confusing errors like "'NoneType' object has no attribute 'error'".
981
class FakeChannel(object):
982
def get_transport(self):
984
def get_log_channel(self):
988
def get_hexdump(self):
993
server = paramiko.SFTPServer(FakeChannel(), 'sftp', StubServer(self), StubSFTPServer,
994
root=self._root, home=self._server_homedir)
996
server.start_subsystem('sftp', None, sock)
997
except socket.error, e:
998
if (len(e.args) > 0) and (e.args[0] == errno.EPIPE):
999
# it's okay for the client to disconnect abruptly
1000
# (bug in paramiko 1.6: it should absorb this exception)
1004
except Exception, e:
1005
# This typically seems to happen during interpreter shutdown, so
1006
# most of the useful ways to report this error are won't work.
1007
# Writing the exception type, and then the text of the exception,
1008
# seems to be the best we can do.
1010
sys.stderr.write('\nEXCEPTION %r: ' % (e.__class__,))
1011
sys.stderr.write('%s\n\n' % (e,))
1012
server.finish_subsystem()
1015
class SFTPAbsoluteServer(SFTPServerWithoutSSH):
1016
"""A test server for sftp transports, using absolute urls."""
1019
"""See bzrlib.transport.Server.get_url."""
1020
homedir = self._homedir
1021
if sys.platform != 'win32':
1022
# Remove the initial '/' on all platforms but win32
1023
homedir = homedir[1:]
1024
return self._get_sftp_url(urlutils.escape(homedir))
1027
class SFTPHomeDirServer(SFTPServerWithoutSSH):
1028
"""A test server for sftp transports, using homedir relative urls."""
1031
"""See bzrlib.transport.Server.get_url."""
1032
return self._get_sftp_url("~/")
1035
class SFTPSiblingAbsoluteServer(SFTPAbsoluteServer):
1036
"""A test server for sftp transports where only absolute paths will work.
1038
It does this by serving from a deeply-nested directory that doesn't exist.
1041
def setUp(self, backing_server=None):
1042
self._server_homedir = '/dev/noone/runs/tests/here'
1043
super(SFTPSiblingAbsoluteServer, self).setUp(backing_server)
908
1046
def get_test_permutations():
909
1047
"""Return the permutations to be used in testing."""
910
from bzrlib.tests import stub_sftp
911
return [(SFTPTransport, stub_sftp.SFTPAbsoluteServer),
912
(SFTPTransport, stub_sftp.SFTPHomeDirServer),
913
(SFTPTransport, stub_sftp.SFTPSiblingAbsoluteServer),
1048
return [(SFTPTransport, SFTPAbsoluteServer),
1049
(SFTPTransport, SFTPHomeDirServer),
1050
(SFTPTransport, SFTPSiblingAbsoluteServer),