/brz/remove-bazaar : revision 7532

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Base implementation of Transport over http using urllib.

18

19

There are separate implementation modules for each http client implementation.

20

"""

21

22

from __future__ import absolute_import

23

24

DEBUG = 0

25

26

import base64

27

import cgi

28

import errno

29

import os

30

import re

31

import socket

32

import ssl

33

import sys

34

import time

35

import urllib

36

import weakref

37

38

try:

39

import http.client as http_client

40

except ImportError:

41

import httplib as http_client

42

try:

43

import urllib.request as urllib_request

44

except ImportError: # python < 3

45

import urllib2 as urllib_request

46

try:

47

from urllib.parse import urljoin, splitport, splittype, splithost, urlencode

48

except ImportError:

49

from urlparse import urljoin

50

from urllib import splitport, splittype, splithost, urlencode

51

52

# TODO: handle_response should be integrated into the http/__init__.py

53

from .response import handle_response

54

55

# FIXME: Oversimplifying, two kind of exceptions should be

56

# raised, once a request is issued: URLError before we have been

57

# able to process the response, HTTPError after that. Process the

58

# response means we are able to leave the socket clean, so if we

59

# are not able to do that, we should close the connection. The

60

# actual code more or less do that, tests should be written to

61

# ensure that.

62

63

from ... import __version__ as breezy_version

64

from ... import (

65

config,

66

debug,

67

errors,

68

lazy_import,

69

osutils,

70

trace,

71

transport,

72

ui,

73

urlutils,

74

)

75

from ...bzr.smart import medium

76

from ...trace import mutter

77

from ...transport import (

78

ConnectedTransport,

79

UnusableRedirect,

80

)

81

82

from . import default_user_agent, ssl

83

84

85

checked_kerberos = False

86

kerberos = None

87

88

89

class addinfourl(urllib_request.addinfourl):

90

'''Replacement addinfourl class compatible with python-2.7's xmlrpclib

91

92

In python-2.7, xmlrpclib expects that the response object that it receives

93

has a getheader method. http_client.HTTPResponse provides this but

94

urllib_request.addinfourl does not. Add the necessary functions here, ported to

95

use the internal data structures of addinfourl.

96

'''

97

98

def getheader(self, name, default=None):

99

if self.headers is None:

100

raise http_client.ResponseNotReady()

101

return self.headers.getheader(name, default)

102

103

def getheaders(self):

104

if self.headers is None:

105

raise http_client.ResponseNotReady()

106

return list(self.headers.items())

107

108

109

class _ReportingFileSocket(object):

110

111

def __init__(self, filesock, report_activity=None):

112

self.filesock = filesock

113

self._report_activity = report_activity

114

115

def report_activity(self, size, direction):

116

if self._report_activity:

117

self._report_activity(size, direction)

118

119

def read(self, size=1):

120

s = self.filesock.read(size)

121

self.report_activity(len(s), 'read')

122

return s

123

124

def readline(self, size=-1):

125

s = self.filesock.readline(size)

126

self.report_activity(len(s), 'read')

127

return s

128

129

def readinto(self, b):

130

s = self.filesock.readinto(b)

131

self.report_activity(s, 'read')

132

return s

133

134

def __getattr__(self, name):

135

return getattr(self.filesock, name)

136

137

138

class _ReportingSocket(object):

139

140

def __init__(self, sock, report_activity=None):

141

self.sock = sock

142

self._report_activity = report_activity

143

144

def report_activity(self, size, direction):

145

if self._report_activity:

146

self._report_activity(size, direction)

147

148

def sendall(self, s, *args):

149

self.sock.sendall(s, *args)

150

self.report_activity(len(s), 'write')

151

152

def recv(self, *args):

153

s = self.sock.recv(*args)

154

self.report_activity(len(s), 'read')

155

return s

156

157

def makefile(self, mode='r', bufsize=-1):

158

# http_client creates a fileobject that doesn't do buffering, which

159

# makes fp.readline() very expensive because it only reads one byte

160

# at a time. So we wrap the socket in an object that forces

161

# sock.makefile to make a buffered file.

162

fsock = self.sock.makefile(mode, 65536)

163

# And wrap that into a reporting kind of fileobject

164

return _ReportingFileSocket(fsock, self._report_activity)

165

166

def __getattr__(self, name):

167

return getattr(self.sock, name)

168

169

170

# We define our own Response class to keep our http_client pipe clean

171

class Response(http_client.HTTPResponse):

172

"""Custom HTTPResponse, to avoid the need to decorate.

173

174

http_client prefers to decorate the returned objects, rather

175

than using a custom object.

176

"""

177

178

# Some responses have bodies in which we have no interest

179

_body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]

180

181

# in finish() below, we may have to discard several MB in the worst

182

# case. To avoid buffering that much, we read and discard by chunks

183

# instead. The underlying file is either a socket or a StringIO, so reading

184

# 8k chunks should be fine.

185

_discarded_buf_size = 8192

186

187

def __init__(self, sock, debuglevel=0, method=None, url=None):

188

self.url = url

189

super(Response, self).__init__(

190

sock, debuglevel=debuglevel, method=method, url=url)

191

192

def begin(self):

193

"""Begin to read the response from the server.

194

195

http_client assumes that some responses get no content and do

196

not even attempt to read the body in that case, leaving

197

the body in the socket, blocking the next request. Let's

198

try to workaround that.

199

"""

200

http_client.HTTPResponse.begin(self)

201

if self.status in self._body_ignored_responses:

202

if self.debuglevel >= 2:

203

print("For status: [%s], will ready body, length: %s" % (

204

self.status, self.length))

205

if not (self.length is None or self.will_close):

206

# In some cases, we just can't read the body not

207

# even try or we may encounter a 104, 'Connection

208

# reset by peer' error if there is indeed no body

209

# and the server closed the connection just after

210

# having issued the response headers (even if the

211

# headers indicate a Content-Type...)

212

body = self.read(self.length)

213

if self.debuglevel >= 9:

214

# This one can be huge and is generally not interesting

215

print("Consumed body: [%s]" % body)

216

self.close()

217

elif self.status == 200:

218

# Whatever the request is, it went ok, so we surely don't want to

219

# close the connection. Some cases are not correctly detected by

220

# http_client.HTTPConnection.getresponse (called by

221

# http_client.HTTPResponse.begin). The CONNECT response for the https

222

# through proxy case is one. Note: the 'will_close' below refers

223

# to the "true" socket between us and the server, whereas the

224

# 'close()' above refers to the copy of that socket created by

225

# http_client for the response itself. So, in the if above we close the

226

# socket to indicate that we are done with the response whereas

227

# below we keep the socket with the server opened.

228

self.will_close = False

229

230

def finish(self):

231

"""Finish reading the body.

232

233

In some cases, the client may have left some bytes to read in the

234

body. That will block the next request to succeed if we use a

235

persistent connection. If we don't use a persistent connection, well,

236

nothing will block the next request since a new connection will be

237

issued anyway.

238

239

:return: the number of bytes left on the socket (may be None)

240

"""

241

pending = None

242

if not self.isclosed():

243

# Make sure nothing was left to be read on the socket

244

pending = 0

245

data = True

246

while data and self.length:

247

# read() will update self.length

248

data = self.read(min(self.length, self._discarded_buf_size))

249

pending += len(data)

250

if pending:

251

trace.mutter("%s bytes left on the HTTP socket", pending)

252

self.close()

253

return pending

254

255

256

# Not inheriting from 'object' because http_client.HTTPConnection doesn't.

257

class AbstractHTTPConnection:

258

"""A custom HTTP(S) Connection, which can reset itself on a bad response"""

259

260

response_class = Response

261

262

# When we detect a server responding with the whole file to range requests,

263

# we want to warn. But not below a given thresold.

264

_range_warning_thresold = 1024 * 1024

265

266

def __init__(self, report_activity=None):

267

self._response = None

268

self._report_activity = report_activity

269

self._ranges_received_whole_file = None

270

271

def _mutter_connect(self):

272

netloc = '%s:%s' % (self.host, self.port)

273

if self.proxied_host is not None:

274

netloc += '(proxy for %s)' % self.proxied_host

275

trace.mutter('* About to connect() to %s' % netloc)

276

277

def getresponse(self):

278

"""Capture the response to be able to cleanup"""

279

self._response = http_client.HTTPConnection.getresponse(self)

280

return self._response

281

282

def cleanup_pipe(self):

283

"""Read the remaining bytes of the last response if any."""

284

if self._response is not None:

285

try:

286

pending = self._response.finish()

287

# Warn the user (once)

288

if (self._ranges_received_whole_file is None

289

and self._response.status == 200

290

and pending

291

and pending > self._range_warning_thresold):

292

self._ranges_received_whole_file = True

293

trace.warning(

294

'Got a 200 response when asking for multiple ranges,'

295

' does your server at %s:%s support range requests?',

296

self.host, self.port)

297

except socket.error as e:

298

# It's conceivable that the socket is in a bad state here

299

# (including some test cases) and in this case, it doesn't need

300

# cleaning anymore, so no need to fail, we just get rid of the

301

# socket and let callers reconnect

302

if (len(e.args) == 0

303

or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):

304

raise

305

self.close()

306

self._response = None

307

# Preserve our preciousss

308

sock = self.sock

309

self.sock = None

310

# Let http_client.HTTPConnection do its housekeeping

311

self.close()

312

# Restore our preciousss

313

self.sock = sock

314

315

def _wrap_socket_for_reporting(self, sock):

316

"""Wrap the socket before anybody use it."""

317

self.sock = _ReportingSocket(sock, self._report_activity)

318

319

320

class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):

321

322

# XXX: Needs refactoring at the caller level.

323

def __init__(self, host, port=None, proxied_host=None,

324

report_activity=None, ca_certs=None):

325

AbstractHTTPConnection.__init__(self, report_activity=report_activity)

326

http_client.HTTPConnection.__init__(self, host, port)

327

self.proxied_host = proxied_host

328

# ca_certs is ignored, it's only relevant for https

329

330

def connect(self):

331

if 'http' in debug.debug_flags:

332

self._mutter_connect()

333

http_client.HTTPConnection.connect(self)

334

self._wrap_socket_for_reporting(self.sock)

335

336

337

class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):

338

339

def __init__(self, host, port=None, key_file=None, cert_file=None,

340

proxied_host=None,

341

report_activity=None, ca_certs=None):

342

AbstractHTTPConnection.__init__(self, report_activity=report_activity)

343

http_client.HTTPSConnection.__init__(

344

self, host, port, key_file, cert_file)

345

self.proxied_host = proxied_host

346

self.ca_certs = ca_certs

347

348

def connect(self):

349

if 'http' in debug.debug_flags:

350

self._mutter_connect()

351

http_client.HTTPConnection.connect(self)

352

self._wrap_socket_for_reporting(self.sock)

353

if self.proxied_host is None:

354

self.connect_to_origin()

355

356

def connect_to_origin(self):

357

# FIXME JRV 2011-12-18: Use location config here?

358

config_stack = config.GlobalStack()

359

cert_reqs = config_stack.get('ssl.cert_reqs')

360

if self.proxied_host is not None:

361

host = self.proxied_host.split(":", 1)[0]

362

else:

363

host = self.host

364

if cert_reqs == ssl.CERT_NONE:

365

ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)

366

ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')

367

ca_certs = None

368

else:

369

if self.ca_certs is None:

370

ca_certs = config_stack.get('ssl.ca_certs')

371

else:

372

ca_certs = self.ca_certs

373

if ca_certs is None:

374

trace.warning(

375

"No valid trusted SSL CA certificates file set. See "

376

"'brz help ssl.ca_certs' for more information on setting "

377

"trusted CAs.")

378

try:

379

ssl_context = ssl.create_default_context(

380

purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)

381

ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE

382

if self.cert_file:

383

ssl_context.load_cert_chain(

384

keyfile=self.key_file, certfile=self.cert_file)

385

ssl_context.verify_mode = cert_reqs

386

ssl_sock = ssl_context.wrap_socket(

387

self.sock, server_hostname=self.host)

388

except ssl.SSLError:

389

trace.note(

390

"\n"

391

"See `brz help ssl.ca_certs` for how to specify trusted CA"

392

"certificates.\n"

393

"Pass -Ossl.cert_reqs=none to disable certificate "

394

"verification entirely.\n")

395

raise

396

# Wrap the ssl socket before anybody use it

397

self._wrap_socket_for_reporting(ssl_sock)

398

399

400

class Request(urllib_request.Request):

401

"""A custom Request object.

402

403

urllib_request determines the request method heuristically (based on

404

the presence or absence of data). We set the method

405

statically.

406

407

The Request object tracks:

408

- the connection the request will be made on.

409

- the authentication parameters needed to preventively set

410

the authentication header once a first authentication have

411

been made.

412

"""

413

414

def __init__(self, method, url, data=None, headers={},

415

origin_req_host=None, unverifiable=False,

416

connection=None, parent=None):

417

urllib_request.Request.__init__(

418

self, url, data, headers,

419

origin_req_host, unverifiable)

420

self.method = method

421

self.connection = connection

422

# To handle redirections

423

self.parent = parent

424

self.redirected_to = None

425

# Unless told otherwise, redirections are not followed

426

self.follow_redirections = False

427

# auth and proxy_auth are dicts containing, at least

428

# (scheme, host, port, realm, user, password, protocol, path).

429

# The dict entries are mostly handled by the AuthHandler.

430

# Some authentication schemes may add more entries.

431

self.auth = {}

432

self.proxy_auth = {}

433

self.proxied_host = None

434

435

def get_method(self):

436

return self.method

437

438

def set_proxy(self, proxy, type):

439

"""Set the proxy and remember the proxied host."""

440

host, port = splitport(self.host)

441

if port is None:

442

# We need to set the default port ourselves way before it gets set

443

# in the HTTP[S]Connection object at build time.

444

if self.type == 'https':

445

conn_class = HTTPSConnection

446

else:

447

conn_class = HTTPConnection

448

port = conn_class.default_port

449

self.proxied_host = '%s:%s' % (host, port)

450

urllib_request.Request.set_proxy(self, proxy, type)

451

# When urllib_request makes a https request with our wrapper code and a proxy,

452

# it sets Host to the https proxy, not the host we want to talk to.

453

# I'm fairly sure this is our fault, but what is the cause is an open

454

# question. -- Robert Collins May 8 2010.

455

self.add_unredirected_header('Host', self.proxied_host)

456

457

458

class _ConnectRequest(Request):

459

460

def __init__(self, request):

461

"""Constructor

462

463

:param request: the first request sent to the proxied host, already

464

processed by the opener (i.e. proxied_host is already set).

465

"""

466

# We give a fake url and redefine selector or urllib_request will be

467

# confused

468

Request.__init__(self, 'CONNECT', request.get_full_url(),

469

connection=request.connection)

470

if request.proxied_host is None:

471

raise AssertionError()

472

self.proxied_host = request.proxied_host

473

474

@property

475

def selector(self):

476

return self.proxied_host

477

478

def get_selector(self):

479

return self.selector

480

481

def set_proxy(self, proxy, type):

482

"""Set the proxy without remembering the proxied host.

483

484

We already know the proxied host by definition, the CONNECT request

485

occurs only when the connection goes through a proxy. The usual

486

processing (masquerade the request so that the connection is done to

487

the proxy while the request is targeted at another host) does not apply

488

here. In fact, the connection is already established with proxy and we

489

just want to enable the SSL tunneling.

490

"""

491

urllib_request.Request.set_proxy(self, proxy, type)

492

493

494

class ConnectionHandler(urllib_request.BaseHandler):

495

"""Provides connection-sharing by pre-processing requests.

496

497

urllib_request provides no way to access the HTTPConnection object

498

internally used. But we need it in order to achieve

499

connection sharing. So, we add it to the request just before

500

it is processed, and then we override the do_open method for

501

http[s] requests in AbstractHTTPHandler.

502

"""

503

504

handler_order = 1000 # after all pre-processings

505

506

def __init__(self, report_activity=None, ca_certs=None):

507

self._report_activity = report_activity

508

self.ca_certs = ca_certs

509

510

def create_connection(self, request, http_connection_class):

511

host = request.host

512

if not host:

513

# Just a bit of paranoia here, this should have been

514

# handled in the higher levels

515

raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')

516

517

# We create a connection (but it will not connect until the first

518

# request is made)

519

try:

520

connection = http_connection_class(

521

host, proxied_host=request.proxied_host,

522

report_activity=self._report_activity,

523

ca_certs=self.ca_certs)

524

except http_client.InvalidURL as exception:

525

# There is only one occurrence of InvalidURL in http_client

526

raise urlutils.InvalidURL(request.get_full_url(),

527

extra='nonnumeric port')

528

529

return connection

530

531

def capture_connection(self, request, http_connection_class):

532

"""Capture or inject the request connection.

533

534

Two cases:

535

- the request have no connection: create a new one,

536

537

- the request have a connection: this one have been used

538

already, let's capture it, so that we can give it to

539

another transport to be reused. We don't do that

540

ourselves: the Transport object get the connection from

541

a first request and then propagate it, from request to

542

request or to cloned transports.

543

"""

544

connection = request.connection

545

if connection is None:

546

# Create a new one

547

connection = self.create_connection(request, http_connection_class)

548

request.connection = connection

549

550

# All connections will pass here, propagate debug level

551

connection.set_debuglevel(DEBUG)

552

return request

553

554

def http_request(self, request):

555

return self.capture_connection(request, HTTPConnection)

556

557

def https_request(self, request):

558

return self.capture_connection(request, HTTPSConnection)

559

560

561

class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):

562

"""A custom handler for HTTP(S) requests.

563

564

We overrive urllib_request.AbstractHTTPHandler to get a better

565

control of the connection, the ability to implement new

566

request types and return a response able to cope with

567

persistent connections.

568

"""

569

570

# We change our order to be before urllib_request HTTP[S]Handlers

571

# and be chosen instead of them (the first http_open called

572

# wins).

573

handler_order = 400

574

575

_default_headers = {'Pragma': 'no-cache',

576

'Cache-control': 'max-age=0',

577

'Connection': 'Keep-Alive',

578

'User-agent': default_user_agent(),

579

'Accept': '*/*',

580

}

581

582

def __init__(self):

583

urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)

584

585

def http_request(self, request):

586

"""Common headers setting"""

587

588

for name, value in self._default_headers.items():

589

if name not in request.headers:

590

request.headers[name] = value

591

# FIXME: We may have to add the Content-Length header if

592

# we have data to send.

593

return request

594

595

def retry_or_raise(self, http_class, request, first_try):

596

"""Retry the request (once) or raise the exception.

597

598

urllib_request raises exception of application level kind, we

599

just have to translate them.

600

601

http_client can raise exceptions of transport level (badly

602

formatted dialog, loss of connexion or socket level

603

problems). In that case we should issue the request again

604

(http_client will close and reopen a new connection if

605

needed).

606

"""

607

# When an exception occurs, we give back the original

608

# Traceback or the bugs are hard to diagnose.

609

exc_type, exc_val, exc_tb = sys.exc_info()

610

if exc_type == socket.gaierror:

611

# No need to retry, that will not help

612

origin_req_host = request.origin_req_host

613

raise errors.ConnectionError("Couldn't resolve host '%s'"

614

% origin_req_host,

615

orig_error=exc_val)

616

elif isinstance(exc_val, http_client.ImproperConnectionState):

617

# The http_client pipeline is in incorrect state, it's a bug in our

618

# implementation.

619

raise exc_val.with_traceback(exc_tb)

620

else:

621

if first_try:

622

if self._debuglevel >= 2:

623

print('Received exception: [%r]' % exc_val)

624

print(' On connection: [%r]' % request.connection)

625

method = request.get_method()

626

url = request.get_full_url()

627

print(' Will retry, %s %r' % (method, url))

628

request.connection.close()

629

response = self.do_open(http_class, request, False)

630

else:

631

if self._debuglevel >= 2:

632

print('Received second exception: [%r]' % exc_val)

633

print(' On connection: [%r]' % request.connection)

634

if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):

635

# http_client.BadStatusLine and

636

# http_client.UnknownProtocol indicates that a

637

# bogus server was encountered or a bad

638

# connection (i.e. transient errors) is

639

# experimented, we have already retried once

640

# for that request so we raise the exception.

641

my_exception = errors.InvalidHttpResponse(

642

request.get_full_url(),

643

'Bad status line received',

644

orig_error=exc_val)

645

elif (isinstance(exc_val, socket.error) and len(exc_val.args)

646

and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):

647

# 10053 == WSAECONNABORTED

648

# 10054 == WSAECONNRESET

649

raise errors.ConnectionReset(

650

"Connection lost while sending request.")

651

else:

652

# All other exception are considered connection related.

653

654

# socket errors generally occurs for reasons

655

# far outside our scope, so closing the

656

# connection and retrying is the best we can

657

# do.

658

selector = request.selector

659

my_exception = errors.ConnectionError(

660

msg='while sending %s %s:' % (request.get_method(),

661

selector),

662

orig_error=exc_val)

663

664

if self._debuglevel >= 2:

665

print('On connection: [%r]' % request.connection)

666

method = request.get_method()

667

url = request.get_full_url()

668

print(' Failed again, %s %r' % (method, url))

669

print(' Will raise: [%r]' % my_exception)

670

raise my_exception.with_traceback(exc_tb)

671

return response

672

673

def do_open(self, http_class, request, first_try=True):

674

"""See urllib_request.AbstractHTTPHandler.do_open for the general idea.

675

676

The request will be retried once if it fails.

677

"""

678

connection = request.connection

679

if connection is None:

680

raise AssertionError(

681

'Cannot process a request without a connection')

682

683

# Get all the headers

684

headers = {}

685

headers.update(request.header_items())

686

headers.update(request.unredirected_hdrs)

687

# Some servers or proxies will choke on headers not properly

688

# cased. http_client/urllib/urllib_request all use capitalize to get canonical

689

# header names, but only python2.5 urllib_request use title() to fix them just

690

# before sending the request. And not all versions of python 2.5 do

691

# that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it

692

# ourself below.

693

headers = {name.title(): val for name, val in headers.items()}

694

695

try:

696

method = request.get_method()

697

url = request.selector

698

if sys.version_info[:2] >= (3, 6):

699

connection._send_request(method, url,

700

# FIXME: implements 100-continue

701

# None, # We don't send the body yet

702

request.data,

703

headers, encode_chunked=False)

704

else:

705

connection._send_request(method, url,

706

# FIXME: implements 100-continue

707

# None, # We don't send the body yet

708

request.data,

709

headers)

710

if 'http' in debug.debug_flags:

711

trace.mutter('> %s %s' % (method, url))

712

hdrs = []

713

for k, v in headers.items():

714

# People are often told to paste -Dhttp output to help

715

# debug. Don't compromise credentials.

716

if k in ('Authorization', 'Proxy-Authorization'):

717

v = '<masked>'

718

hdrs.append('%s: %s' % (k, v))

719

trace.mutter('> ' + '\n> '.join(hdrs) + '\n')

720

if self._debuglevel >= 1:

721

print('Request sent: [%r] from (%s)'

722

% (request, request.connection.sock.getsockname()))

723

response = connection.getresponse()

724

convert_to_addinfourl = True

725

except (ssl.SSLError, ssl.CertificateError):

726

# Something is wrong with either the certificate or the hostname,

727

# re-trying won't help

728

raise

729

except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,

730

socket.error, http_client.HTTPException):

731

response = self.retry_or_raise(http_class, request, first_try)

732

convert_to_addinfourl = False

733

734

response.msg = response.reason

735

return response

736

737

# FIXME: HTTPConnection does not fully support 100-continue (the

738

# server responses are just ignored)

739

740

# if code == 100:

741

# mutter('Will send the body')

742

# # We can send the body now

743

# body = request.data

744

# if body is None:

745

# raise URLError("No data given")

746

# connection.send(body)

747

# response = connection.getresponse()

748

749

if self._debuglevel >= 2:

750

print('Receives response: %r' % response)

751

print(' For: %r(%r)' % (request.get_method(),

752

request.get_full_url()))

753

754

if convert_to_addinfourl:

755

# Shamelessly copied from urllib_request

756

req = request

757

r = response

758

r.recv = r.read

759

fp = socket._fileobject(r, bufsize=65536)

760

resp = addinfourl(fp, r.msg, req.get_full_url())

761

resp.code = r.status

762

resp.msg = r.reason

763

resp.version = r.version

764

if self._debuglevel >= 2:

765

print('Create addinfourl: %r' % resp)

766

print(' For: %r(%r)' % (request.get_method(),

767

request.get_full_url()))

768

if 'http' in debug.debug_flags:

769

version = 'HTTP/%d.%d'

770

try:

771

version = version % (resp.version / 10,

772

resp.version % 10)

773

except:

774

version = 'HTTP/%r' % resp.version

775

trace.mutter('< %s %s %s' % (version, resp.code,

776

resp.msg))

777

# Use the raw header lines instead of treating resp.info() as a

778

# dict since we may miss duplicated headers otherwise.

779

hdrs = [h.rstrip('\r\n') for h in resp.info().headers]

780

trace.mutter('< ' + '\n< '.join(hdrs) + '\n')

781

else:

782

resp = response

783

return resp

784

785

786

class HTTPHandler(AbstractHTTPHandler):

787

"""A custom handler that just thunks into HTTPConnection"""

788

789

def http_open(self, request):

790

return self.do_open(HTTPConnection, request)

791

792

793

class HTTPSHandler(AbstractHTTPHandler):

794

"""A custom handler that just thunks into HTTPSConnection"""

795

796

https_request = AbstractHTTPHandler.http_request

797

798

def https_open(self, request):

799

connection = request.connection

800

if connection.sock is None and \

801

connection.proxied_host is not None and \

802

request.get_method() != 'CONNECT': # Don't loop

803

# FIXME: We need a gazillion connection tests here, but we still

804

# miss a https server :-( :

805

# - with and without proxy

806

# - with and without certificate

807

# - with self-signed certificate

808

# - with and without authentication

809

# - with good and bad credentials (especially the proxy auth around

810

# CONNECT)

811

# - with basic and digest schemes

812

# - reconnection on errors

813

# - connection persistence behaviour (including reconnection)

814

815

# We are about to connect for the first time via a proxy, we must

816

# issue a CONNECT request first to establish the encrypted link

817

connect = _ConnectRequest(request)

818

response = self.parent.open(connect)

819

if response.code != 200:

820

raise errors.ConnectionError("Can't connect to %s via proxy %s" % (

821

connect.proxied_host, self.host))

822

# Housekeeping

823

connection.cleanup_pipe()

824

# Establish the connection encryption

825

connection.connect_to_origin()

826

# Propagate the connection to the original request

827

request.connection = connection

828

return self.do_open(HTTPSConnection, request)

829

830

831

class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):

832

"""Handles redirect requests.

833

834

We have to implement our own scheme because we use a specific

835

Request object and because we want to implement a specific

836

policy.

837

"""

838

_debuglevel = DEBUG

839

# RFC2616 says that only read requests should be redirected

840

# without interacting with the user. But Breezy uses some

841

# shortcuts to optimize against roundtrips which can leads to

842

# write requests being issued before read requests of

843

# containing dirs can be redirected. So we redirect write

844

# requests in the same way which seems to respect the spirit

845

# of the RFC if not its letter.

846

847

def redirect_request(self, req, fp, code, msg, headers, newurl):

848

"""See urllib_request.HTTPRedirectHandler.redirect_request"""

849

# We would have preferred to update the request instead

850

# of creating a new one, but the urllib_request.Request object

851

# has a too complicated creation process to provide a

852

# simple enough equivalent update process. Instead, when

853

# redirecting, we only update the following request in

854

# the redirect chain with a reference to the parent

855

# request .

856

857

# Some codes make no sense in our context and are treated

858

# as errors:

859

860

# 300: Multiple choices for different representations of

861

# the URI. Using that mechanisn with Breezy will violate the

862

# protocol neutrality of Transport.

863

864

# 304: Not modified (SHOULD only occurs with conditional

865

# GETs which are not used by our implementation)

866

867

# 305: Use proxy. I can't imagine this one occurring in

868

# our context-- vila/20060909

869

870

# 306: Unused (if the RFC says so...)

871

872

# If the code is 302 and the request is HEAD, some may

873

# think that it is a sufficent hint that the file exists

874

# and that we MAY avoid following the redirections. But

875

# if we want to be sure, we MUST follow them.

876

877

origin_req_host = req.origin_req_host

878

879

if code in (301, 302, 303, 307, 308):

880

return Request(req.get_method(), newurl,

881

headers=req.headers,

882

origin_req_host=origin_req_host,

883

unverifiable=True,

884

# TODO: It will be nice to be able to

885

# detect virtual hosts sharing the same

886

# IP address, that will allow us to

887

# share the same connection...

888

connection=None,

889

parent=req,

890

)

891

else:

892

raise urllib_request.HTTPError(

893

req.get_full_url(), code, msg, headers, fp)

894

895

def http_error_302(self, req, fp, code, msg, headers):

896

"""Requests the redirected to URI.

897

898

Copied from urllib_request to be able to clean the pipe of the associated

899

connection, *before* issuing the redirected request but *after* having

900

eventually raised an error.

901

"""

902

# Some servers (incorrectly) return multiple Location headers

903

# (so probably same goes for URI). Use first header.

904

905

# TODO: Once we get rid of addinfourl objects, the

906

# following will need to be updated to use correct case

907

# for headers.

908

if 'location' in headers:

909

newurl = headers.get('location')

910

elif 'uri' in headers:

911

newurl = headers.get('uri')

912

else:

913

return

914

915

newurl = urljoin(req.get_full_url(), newurl)

916

917

if self._debuglevel >= 1:

918

print('Redirected to: %s (followed: %r)' % (newurl,

919

req.follow_redirections))

920

if req.follow_redirections is False:

921

req.redirected_to = newurl

922

return fp

923

924

# This call succeeds or raise an error. urllib_request returns

925

# if redirect_request returns None, but our

926

# redirect_request never returns None.

927

redirected_req = self.redirect_request(req, fp, code, msg, headers,

928

newurl)

929

930

# loop detection

931

# .redirect_dict has a key url if url was previously visited.

932

if hasattr(req, 'redirect_dict'):

933

visited = redirected_req.redirect_dict = req.redirect_dict

934

if (visited.get(newurl, 0) >= self.max_repeats or

935

len(visited) >= self.max_redirections):

936

raise urllib_request.HTTPError(req.get_full_url(), code,

937

self.inf_msg + msg, headers, fp)

938

else:

939

visited = redirected_req.redirect_dict = req.redirect_dict = {}

940

visited[newurl] = visited.get(newurl, 0) + 1

941

942

# We can close the fp now that we are sure that we won't

943

# use it with HTTPError.

944

fp.close()

945

# We have all we need already in the response

946

req.connection.cleanup_pipe()

947

948

return self.parent.open(redirected_req)

949

950

http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302

951

952

953

class ProxyHandler(urllib_request.ProxyHandler):

954

"""Handles proxy setting.

955

956

Copied and modified from urllib_request to be able to modify the request during

957

the request pre-processing instead of modifying it at _open time. As we

958

capture (or create) the connection object during request processing, _open

959

time was too late.

960

961

The main task is to modify the request so that the connection is done to

962

the proxy while the request still refers to the destination host.

963

964

Note: the proxy handling *may* modify the protocol used; the request may be

965

against an https server proxied through an http proxy. So, https_request

966

will be called, but later it's really http_open that will be called. This

967

explains why we don't have to call self.parent.open as the urllib_request did.

968

"""

969

970

# Proxies must be in front

971

handler_order = 100

972

_debuglevel = DEBUG

973

974

def __init__(self, proxies=None):

975

urllib_request.ProxyHandler.__init__(self, proxies)

976

# First, let's get rid of urllib_request implementation

977

for type, proxy in self.proxies.items():

978

if self._debuglevel >= 3:

979

print('Will unbind %s_open for %r' % (type, proxy))

980

delattr(self, '%s_open' % type)

981

982

def bind_scheme_request(proxy, scheme):

983

if proxy is None:

984

return

985

scheme_request = scheme + '_request'

986

if self._debuglevel >= 3:

987

print('Will bind %s for %r' % (scheme_request, proxy))

988

setattr(self, scheme_request,

989

lambda request: self.set_proxy(request, scheme))

990

# We are interested only by the http[s] proxies

991

http_proxy = self.get_proxy_env_var('http')

992

bind_scheme_request(http_proxy, 'http')

993

https_proxy = self.get_proxy_env_var('https')

994

bind_scheme_request(https_proxy, 'https')

995

996

def get_proxy_env_var(self, name, default_to='all'):

997

"""Get a proxy env var.

998

999

Note that we indirectly rely on

1000

urllib.getproxies_environment taking into account the

1001

uppercased values for proxy variables.

1002

"""

1003

try:

1004

return self.proxies[name.lower()]

1005

except KeyError:

1006

if default_to is not None:

1007

# Try to get the alternate environment variable

1008

try:

1009

return self.proxies[default_to]

1010

except KeyError:

1011

pass

1012

return None

1013

1014

def proxy_bypass(self, host):

1015

"""Check if host should be proxied or not.

1016

1017

:returns: True to skip the proxy, False otherwise.

1018

"""

1019

no_proxy = self.get_proxy_env_var('no', default_to=None)

1020

bypass = self.evaluate_proxy_bypass(host, no_proxy)

1021

if bypass is None:

1022

# Nevertheless, there are platform-specific ways to

1023

# ignore proxies...

1024

return urllib_request.proxy_bypass(host)

1025

else:

1026

return bypass

1027

1028

def evaluate_proxy_bypass(self, host, no_proxy):

1029

"""Check the host against a comma-separated no_proxy list as a string.

1030

1031

:param host: ``host:port`` being requested

1032

1033

:param no_proxy: comma-separated list of hosts to access directly.

1034

1035

:returns: True to skip the proxy, False not to, or None to

1036

leave it to urllib.

1037

"""

1038

if no_proxy is None:

1039

# All hosts are proxied

1040

return False

1041

hhost, hport = splitport(host)

1042

# Does host match any of the domains mentioned in

1043

# no_proxy ? The rules about what is authorized in no_proxy

1044

# are fuzzy (to say the least). We try to allow most

1045

# commonly seen values.

1046

for domain in no_proxy.split(','):

1047

domain = domain.strip()

1048

if domain == '':

1049

continue

1050

dhost, dport = splitport(domain)

1051

if hport == dport or dport is None:

1052

# Protect glob chars

1053

dhost = dhost.replace(".", r"\.")

1054

dhost = dhost.replace("*", r".*")

1055

dhost = dhost.replace("?", r".")

1056

if re.match(dhost, hhost, re.IGNORECASE):

1057

return True

1058

# Nothing explicitly avoid the host

1059

return None

1060

1061

def set_proxy(self, request, type):

1062

host = request.host

1063

if self.proxy_bypass(host):

1064

return request

1065

1066

proxy = self.get_proxy_env_var(type)

1067

if self._debuglevel >= 3:

1068

print('set_proxy %s_request for %r' % (type, proxy))

1069

# FIXME: python 2.5 urlparse provides a better _parse_proxy which can

1070

# grok user:password@host:port as well as

1071

# http://user:password@host:port

1072

1073

parsed_url = transport.ConnectedTransport._split_url(proxy)

1074

if not parsed_url.host:

1075

raise urlutils.InvalidURL(proxy, 'No host component')

1076

1077

if request.proxy_auth == {}:

1078

# No proxy auth parameter are available, we are handling the first

1079

# proxied request, intialize. scheme (the authentication scheme)

1080

# and realm will be set by the AuthHandler

1081

request.proxy_auth = {

1082

'host': parsed_url.host,

1083

'port': parsed_url.port,

1084

'user': parsed_url.user,

1085

'password': parsed_url.password,

1086

'protocol': parsed_url.scheme,

1087

# We ignore path since we connect to a proxy

1088

'path': None}

1089

if parsed_url.port is None:

1090

phost = parsed_url.host

1091

else:

1092

phost = parsed_url.host + ':%d' % parsed_url.port

1093

request.set_proxy(phost, type)

1094

if self._debuglevel >= 3:

1095

print('set_proxy: proxy set to %s://%s' % (type, phost))

1096

return request

1097

1098

1099

class AbstractAuthHandler(urllib_request.BaseHandler):

1100

"""A custom abstract authentication handler for all http authentications.

1101

1102

Provides the meat to handle authentication errors and

1103

preventively set authentication headers after the first

1104

successful authentication.

1105

1106

This can be used for http and proxy, as well as for basic, negotiate and

1107

digest authentications.

1108

1109

This provides an unified interface for all authentication handlers

1110

(urllib_request provides far too many with different policies).

1111

1112

The interaction between this handler and the urllib_request

1113

framework is not obvious, it works as follow:

1114

1115

opener.open(request) is called:

1116

1117

- that may trigger http_request which will add an authentication header

1118

(self.build_header) if enough info is available.

1119

1120

- the request is sent to the server,

1121

1122

- if an authentication error is received self.auth_required is called,

1123

we acquire the authentication info in the error headers and call

1124

self.auth_match to check that we are able to try the

1125

authentication and complete the authentication parameters,

1126

1127

- we call parent.open(request), that may trigger http_request

1128

and will add a header (self.build_header), but here we have

1129

all the required info (keep in mind that the request and

1130

authentication used in the recursive calls are really (and must be)

1131

the *same* objects).

1132

1133

- if the call returns a response, the authentication have been

1134

successful and the request authentication parameters have been updated.

1135

"""

1136

1137

scheme = None

1138

"""The scheme as it appears in the server header (lower cased)"""

1139

1140

_max_retry = 3

1141

"""We don't want to retry authenticating endlessly"""

1142

1143

requires_username = True

1144

"""Whether the auth mechanism requires a username."""

1145

1146

# The following attributes should be defined by daughter

1147

# classes:

1148

# - auth_required_header: the header received from the server

1149

# - auth_header: the header sent in the request

1150

1151

def __init__(self):

1152

# We want to know when we enter into an try/fail cycle of

1153

# authentications so we initialize to None to indicate that we aren't

1154

# in such a cycle by default.

1155

self._retry_count = None

1156

1157

def _parse_auth_header(self, server_header):

1158

"""Parse the authentication header.

1159

1160

:param server_header: The value of the header sent by the server

1161

describing the authenticaion request.

1162

1163

:return: A tuple (scheme, remainder) scheme being the first word in the

1164

given header (lower cased), remainder may be None.

1165

"""

1166

try:

1167

scheme, remainder = server_header.split(None, 1)

1168

except ValueError:

1169

scheme = server_header

1170

remainder = None

1171

return (scheme.lower(), remainder)

1172

1173

def update_auth(self, auth, key, value):

1174

"""Update a value in auth marking the auth as modified if needed"""

1175

old_value = auth.get(key, None)

1176

if old_value != value:

1177

auth[key] = value

1178

auth['modified'] = True

1179

1180

def auth_required(self, request, headers):

1181

"""Retry the request if the auth scheme is ours.

1182

1183

:param request: The request needing authentication.

1184

:param headers: The headers for the authentication error response.

1185

:return: None or the response for the authenticated request.

1186

"""

1187

# Don't try to authenticate endlessly

1188

if self._retry_count is None:

1189

# The retry being recusrsive calls, None identify the first retry

1190

self._retry_count = 1

1191

else:

1192

self._retry_count += 1

1193

if self._retry_count > self._max_retry:

1194

# Let's be ready for next round

1195

self._retry_count = None

1196

return None

1197

server_headers = headers.get_all(self.auth_required_header)

1198

if not server_headers:

1199

# The http error MUST have the associated

1200

# header. This must never happen in production code.

1201

trace.mutter('%s not found', self.auth_required_header)

1202

return None

1203

1204

auth = self.get_auth(request)

1205

auth['modified'] = False

1206

# Put some common info in auth if the caller didn't

1207

if auth.get('path', None) is None:

1208

parsed_url = urlutils.URL.from_string(request.get_full_url())

1209

self.update_auth(auth, 'protocol', parsed_url.scheme)

1210

self.update_auth(auth, 'host', parsed_url.host)

1211

self.update_auth(auth, 'port', parsed_url.port)

1212

self.update_auth(auth, 'path', parsed_url.path)

1213

# FIXME: the auth handler should be selected at a single place instead

1214

# of letting all handlers try to match all headers, but the current

1215

# design doesn't allow a simple implementation.

1216

for server_header in server_headers:

1217

# Several schemes can be proposed by the server, try to match each

1218

# one in turn

1219

matching_handler = self.auth_match(server_header, auth)

1220

if matching_handler:

1221

# auth_match may have modified auth (by adding the

1222

# password or changing the realm, for example)

1223

if (request.get_header(self.auth_header, None) is not None

1224

and not auth['modified']):

1225

# We already tried that, give up

1226

return None

1227

1228

# Only the most secure scheme proposed by the server should be

1229

# used, since the handlers use 'handler_order' to describe that

1230

# property, the first handler tried takes precedence, the

1231

# others should not attempt to authenticate if the best one

1232

# failed.

1233

best_scheme = auth.get('best_scheme', None)

1234

if best_scheme is None:

1235

# At that point, if current handler should doesn't succeed

1236

# the credentials are wrong (or incomplete), but we know

1237

# that the associated scheme should be used.

1238

best_scheme = auth['best_scheme'] = self.scheme

1239

if best_scheme != self.scheme:

1240

continue

1241

1242

if self.requires_username and auth.get('user', None) is None:

1243

# Without a known user, we can't authenticate

1244

return None

1245

1246

# Housekeeping

1247

request.connection.cleanup_pipe()

1248

# Retry the request with an authentication header added

1249

response = self.parent.open(request)

1250

if response:

1251

self.auth_successful(request, response)

1252

return response

1253

# We are not qualified to handle the authentication.

1254

# Note: the authentication error handling will try all

1255

# available handlers. If one of them authenticates

1256

# successfully, a response will be returned. If none of

1257

# them succeeds, None will be returned and the error

1258

# handler will raise the 401 'Unauthorized' or the 407

1259

# 'Proxy Authentication Required' error.

1260

return None

1261

1262

def add_auth_header(self, request, header):

1263

"""Add the authentication header to the request"""

1264

request.add_unredirected_header(self.auth_header, header)

1265

1266

def auth_match(self, header, auth):

1267

"""Check that we are able to handle that authentication scheme.

1268

1269

The request authentication parameters may need to be

1270

updated with info from the server. Some of these

1271

parameters, when combined, are considered to be the

1272

authentication key, if one of them change the

1273

authentication result may change. 'user' and 'password'

1274

are exampls, but some auth schemes may have others

1275

(digest's nonce is an example, digest's nonce_count is a

1276

*counter-example*). Such parameters must be updated by

1277

using the update_auth() method.

1278

1279

:param header: The authentication header sent by the server.

1280

:param auth: The auth parameters already known. They may be

1281

updated.

1282

:returns: True if we can try to handle the authentication.

1283

"""

1284

raise NotImplementedError(self.auth_match)

1285

1286

def build_auth_header(self, auth, request):

1287

"""Build the value of the header used to authenticate.

1288

1289

:param auth: The auth parameters needed to build the header.

1290

:param request: The request needing authentication.

1291

1292

:return: None or header.

1293

"""

1294

raise NotImplementedError(self.build_auth_header)

1295

1296

def auth_successful(self, request, response):

1297

"""The authentification was successful for the request.

1298

1299

Additional infos may be available in the response.

1300

1301

:param request: The succesfully authenticated request.

1302

:param response: The server response (may contain auth info).

1303

"""

1304

# It may happen that we need to reconnect later, let's be ready

1305

self._retry_count = None

1306

1307

def get_user_password(self, auth):

1308

"""Ask user for a password if none is already available.

1309

1310

:param auth: authentication info gathered so far (from the initial url

1311

and then during dialog with the server).

1312

"""

1313

auth_conf = config.AuthenticationConfig()

1314

user = auth.get('user', None)

1315

password = auth.get('password', None)

1316

realm = auth['realm']

1317

port = auth.get('port', None)

1318

1319

if user is None:

1320

user = auth_conf.get_user(auth['protocol'], auth['host'],

1321

port=port, path=auth['path'],

1322

realm=realm, ask=True,

1323

prompt=self.build_username_prompt(auth))

1324

if user is not None and password is None:

1325

password = auth_conf.get_password(

1326

auth['protocol'], auth['host'], user,

1327

port=port,

1328

path=auth['path'], realm=realm,

1329

prompt=self.build_password_prompt(auth))

1330

1331

return user, password

1332

1333

def _build_password_prompt(self, auth):

1334

"""Build a prompt taking the protocol used into account.

1335

1336

The AuthHandler is used by http and https, we want that information in

1337

the prompt, so we build the prompt from the authentication dict which

1338

contains all the needed parts.

1339

1340

Also, http and proxy AuthHandlers present different prompts to the

1341

user. The daughter classes should implements a public

1342

build_password_prompt using this method.

1343

"""

1344

prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'

1345

realm = auth['realm']

1346

if realm is not None:

1347

prompt += u", Realm: '%s'" % realm

1348

prompt += u' password'

1349

return prompt

1350

1351

def _build_username_prompt(self, auth):

1352

"""Build a prompt taking the protocol used into account.

1353

1354

The AuthHandler is used by http and https, we want that information in

1355

the prompt, so we build the prompt from the authentication dict which

1356

contains all the needed parts.

1357

1358

Also, http and proxy AuthHandlers present different prompts to the

1359

user. The daughter classes should implements a public

1360

build_username_prompt using this method.

1361

"""

1362

prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'

1363

realm = auth['realm']

1364

if realm is not None:

1365

prompt += u", Realm: '%s'" % realm

1366

prompt += u' username'

1367

return prompt

1368

1369

def http_request(self, request):

1370

"""Insert an authentication header if information is available"""

1371

auth = self.get_auth(request)

1372

if self.auth_params_reusable(auth):

1373

self.add_auth_header(

1374

request, self.build_auth_header(auth, request))

1375

return request

1376

1377

https_request = http_request # FIXME: Need test

1378

1379

1380

class NegotiateAuthHandler(AbstractAuthHandler):

1381

"""A authentication handler that handles WWW-Authenticate: Negotiate.

1382

1383

At the moment this handler supports just Kerberos. In the future,

1384

NTLM support may also be added.

1385

"""

1386

1387

scheme = 'negotiate'

1388

handler_order = 480

1389

requires_username = False

1390

1391

def auth_match(self, header, auth):

1392

scheme, raw_auth = self._parse_auth_header(header)

1393

if scheme != self.scheme:

1394

return False

1395

self.update_auth(auth, 'scheme', scheme)

1396

resp = self._auth_match_kerberos(auth)

1397

if resp is None:

1398

return False

1399

# Optionally should try to authenticate using NTLM here

1400

self.update_auth(auth, 'negotiate_response', resp)

1401

return True

1402

1403

def _auth_match_kerberos(self, auth):

1404

"""Try to create a GSSAPI response for authenticating against a host."""

1405

global kerberos, checked_kerberos

1406

if kerberos is None and not checked_kerberos:

1407

try:

1408

import kerberos

1409

except ImportError:

1410

kerberos = None

1411

checked_kerberos = True

1412

if kerberos is None:

1413

return None

1414

ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)

1415

if ret < 1:

1416

trace.warning('Unable to create GSSAPI context for %s: %d',

1417

auth['host'], ret)

1418

return None

1419

ret = kerberos.authGSSClientStep(vc, "")

1420

if ret < 0:

1421

trace.mutter('authGSSClientStep failed: %d', ret)

1422

return None

1423

return kerberos.authGSSClientResponse(vc)

1424

1425

def build_auth_header(self, auth, request):

1426

return "Negotiate %s" % auth['negotiate_response']

1427

1428

def auth_params_reusable(self, auth):

1429

# If the auth scheme is known, it means a previous

1430

# authentication was successful, all information is

1431

# available, no further checks are needed.

1432

return (auth.get('scheme', None) == 'negotiate' and

1433

auth.get('negotiate_response', None) is not None)

1434

1435

1436

class BasicAuthHandler(AbstractAuthHandler):

1437

"""A custom basic authentication handler."""

1438

1439

scheme = 'basic'

1440

handler_order = 500

1441

auth_regexp = re.compile('realm="([^"]*)"', re.I)

1442

1443

def build_auth_header(self, auth, request):

1444

raw = '%s:%s' % (auth['user'], auth['password'])

1445

auth_header = 'Basic ' + \

1446

base64.b64encode(raw.encode('utf-8')).decode('ascii')

1447

return auth_header

1448

1449

def extract_realm(self, header_value):

1450

match = self.auth_regexp.search(header_value)

1451

realm = None

1452

if match:

1453

realm = match.group(1)

1454

return match, realm

1455

1456

def auth_match(self, header, auth):

1457

scheme, raw_auth = self._parse_auth_header(header)

1458

if scheme != self.scheme:

1459

return False

1460

1461

match, realm = self.extract_realm(raw_auth)

1462

if match:

1463

# Put useful info into auth

1464

self.update_auth(auth, 'scheme', scheme)

1465

self.update_auth(auth, 'realm', realm)

1466

if (auth.get('user', None) is None

1467

or auth.get('password', None) is None):

1468

user, password = self.get_user_password(auth)

1469

self.update_auth(auth, 'user', user)

1470

self.update_auth(auth, 'password', password)

1471

return match is not None

1472

1473

def auth_params_reusable(self, auth):

1474

# If the auth scheme is known, it means a previous

1475

# authentication was successful, all information is

1476

# available, no further checks are needed.

1477

return auth.get('scheme', None) == 'basic'

1478

1479

1480

def get_digest_algorithm_impls(algorithm):

1481

H = None

1482

KD = None

1483

if algorithm == 'MD5':

1484

def H(x): return osutils.md5(x).hexdigest()

1485

elif algorithm == 'SHA':

1486

H = osutils.sha_string

1487

if H is not None:

1488

def KD(secret, data): return H(

1489

("%s:%s" % (secret, data)).encode('utf-8'))

1490

return H, KD

1491

1492

1493

def get_new_cnonce(nonce, nonce_count):

1494

raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),

1495

osutils.rand_chars(8))

1496

return osutils.sha_string(raw.encode('utf-8'))[:16]

1497

1498

1499

class DigestAuthHandler(AbstractAuthHandler):

1500

"""A custom digest authentication handler."""

1501

1502

scheme = 'digest'

1503

# Before basic as digest is a bit more secure and should be preferred

1504

handler_order = 490

1505

1506

def auth_params_reusable(self, auth):

1507

# If the auth scheme is known, it means a previous

1508

# authentication was successful, all information is

1509

# available, no further checks are needed.

1510

return auth.get('scheme', None) == 'digest'

1511

1512

def auth_match(self, header, auth):

1513

scheme, raw_auth = self._parse_auth_header(header)

1514

if scheme != self.scheme:

1515

return False

1516

1517

# Put the requested authentication info into a dict

1518

req_auth = urllib_request.parse_keqv_list(

1519

urllib_request.parse_http_list(raw_auth))

1520

1521

# Check that we can handle that authentication

1522

qop = req_auth.get('qop', None)

1523

if qop != 'auth': # No auth-int so far

1524

return False

1525

1526

H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))

1527

if H is None:

1528

return False

1529

1530

realm = req_auth.get('realm', None)

1531

# Put useful info into auth

1532

self.update_auth(auth, 'scheme', scheme)

1533

self.update_auth(auth, 'realm', realm)

1534

if auth.get('user', None) is None or auth.get('password', None) is None:

1535

user, password = self.get_user_password(auth)

1536

self.update_auth(auth, 'user', user)

1537

self.update_auth(auth, 'password', password)

1538

1539

try:

1540

if req_auth.get('algorithm', None) is not None:

1541

self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))

1542

nonce = req_auth['nonce']

1543

if auth.get('nonce', None) != nonce:

1544

# A new nonce, never used

1545

self.update_auth(auth, 'nonce_count', 0)

1546

self.update_auth(auth, 'nonce', nonce)

1547

self.update_auth(auth, 'qop', qop)

1548

auth['opaque'] = req_auth.get('opaque', None)

1549

except KeyError:

1550

# Some required field is not there

1551

return False

1552

1553

return True

1554

1555

def build_auth_header(self, auth, request):

1556

selector = request.selector

1557

url_scheme, url_selector = splittype(selector)

1558

sel_host, uri = splithost(url_selector)

1559

1560

A1 = ('%s:%s:%s' %

1561

(auth['user'], auth['realm'], auth['password'])).encode('utf-8')

1562

A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')

1563

1564

nonce = auth['nonce']

1565

qop = auth['qop']

1566

1567

nonce_count = auth['nonce_count'] + 1

1568

ncvalue = '%08x' % nonce_count

1569

cnonce = get_new_cnonce(nonce, nonce_count)

1570

1571

H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))

1572

nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))

1573

request_digest = KD(H(A1), nonce_data)

1574

1575

header = 'Digest '

1576

header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],

1577

auth['realm'],

1578

nonce)

1579

header += ', uri="%s"' % uri

1580

header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)

1581

header += ', qop="%s"' % qop

1582

header += ', response="%s"' % request_digest

1583

# Append the optional fields

1584

opaque = auth.get('opaque', None)

1585

if opaque:

1586

header += ', opaque="%s"' % opaque

1587

if auth.get('algorithm', None):

1588

header += ', algorithm="%s"' % auth.get('algorithm')

1589

1590

# We have used the nonce once more, update the count

1591

auth['nonce_count'] = nonce_count

1592

1593

return header

1594

1595

1596

class HTTPAuthHandler(AbstractAuthHandler):

1597

"""Custom http authentication handler.

1598

1599

Send the authentication preventively to avoid the roundtrip

1600

associated with the 401 error and keep the revelant info in

1601

the auth request attribute.

1602

"""

1603

1604

auth_required_header = 'www-authenticate'

1605

auth_header = 'Authorization'

1606

1607

def get_auth(self, request):

1608

"""Get the auth params from the request"""

1609

return request.auth

1610

1611

def set_auth(self, request, auth):

1612

"""Set the auth params for the request"""

1613

request.auth = auth

1614

1615

def build_password_prompt(self, auth):

1616

return self._build_password_prompt(auth)

1617

1618

def build_username_prompt(self, auth):

1619

return self._build_username_prompt(auth)

1620

1621

def http_error_401(self, req, fp, code, msg, headers):

1622

return self.auth_required(req, headers)

1623

1624

1625

class ProxyAuthHandler(AbstractAuthHandler):

1626

"""Custom proxy authentication handler.

1627

1628

Send the authentication preventively to avoid the roundtrip

1629

associated with the 407 error and keep the revelant info in

1630

the proxy_auth request attribute..

1631

"""

1632

1633

auth_required_header = 'proxy-authenticate'

1634

# FIXME: the correct capitalization is Proxy-Authorization,

1635

# but python-2.4 urllib_request.Request insist on using capitalize()

1636

# instead of title().

1637

auth_header = 'Proxy-authorization'

1638

1639

def get_auth(self, request):

1640

"""Get the auth params from the request"""

1641

return request.proxy_auth

1642

1643

def set_auth(self, request, auth):

1644

"""Set the auth params for the request"""

1645

request.proxy_auth = auth

1646

1647

def build_password_prompt(self, auth):

1648

prompt = self._build_password_prompt(auth)

1649

prompt = u'Proxy ' + prompt

1650

return prompt

1651

1652

def build_username_prompt(self, auth):

1653

prompt = self._build_username_prompt(auth)

1654

prompt = u'Proxy ' + prompt

1655

return prompt

1656

1657

def http_error_407(self, req, fp, code, msg, headers):

1658

return self.auth_required(req, headers)

1659

1660

1661

class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):

1662

"""Custom http basic authentication handler"""

1663

1664

1665

class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):

1666

"""Custom proxy basic authentication handler"""

1667

1668

1669

class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):

1670

"""Custom http basic authentication handler"""

1671

1672

1673

class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):

1674

"""Custom proxy basic authentication handler"""

1675

1676

1677

class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):

1678

"""Custom http negotiate authentication handler"""

1679

1680

1681

class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):

1682

"""Custom proxy negotiate authentication handler"""

1683

1684

1685

class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):

1686

"""Process HTTP error responses.

1687

1688

We don't really process the errors, quite the contrary

1689

instead, we leave our Transport handle them.

1690

"""

1691

1692

accepted_errors = [200, # Ok

1693

201,

1694

202,

1695

204,

1696

206, # Partial content

1697

400,

1698

403,

1699

404, # Not found

1700

405, # Method not allowed

1701

406, # Not Acceptable

1702

409, # Conflict

1703

416, # Range not satisfiable

1704

422, # Unprocessible entity

1705

501, # Not implemented

1706

]

1707

"""The error codes the caller will handle.

1708

1709

This can be specialized in the request on a case-by case basis, but the

1710

common cases are covered here.

1711

"""

1712

1713

def http_response(self, request, response):

1714

code, msg, hdrs = response.code, response.msg, response.info()

1715

1716

if code not in self.accepted_errors:

1717

response = self.parent.error('http', request, response,

1718

code, msg, hdrs)

1719

return response

1720

1721

https_response = http_response

1722

1723

1724

class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):

1725

"""Translate common errors into Breezy Exceptions"""

1726

1727

def http_error_default(self, req, fp, code, msg, hdrs):

1728

if code == 403:

1729

raise errors.TransportError(

1730

'Server refuses to fulfill the request (403 Forbidden)'

1731

' for %s' % req.get_full_url())

1732

else:

1733

raise errors.UnexpectedHttpStatus(

1734

req.get_full_url(), code,

1735

'Unable to handle http code: %s' % msg)

1736

1737

1738

class Opener(object):

1739

"""A wrapper around urllib_request.build_opener

1740

1741

Daughter classes can override to build their own specific opener

1742

"""

1743

# TODO: Provides hooks for daughter classes.

1744

1745

def __init__(self,

1746

connection=ConnectionHandler,

1747

redirect=HTTPRedirectHandler,

1748

error=HTTPErrorProcessor,

1749

report_activity=None,

1750

ca_certs=None):

1751

self._opener = urllib_request.build_opener(

1752

connection(report_activity=report_activity, ca_certs=ca_certs),

1753

redirect, error,

1754

ProxyHandler(),

1755

HTTPBasicAuthHandler(),

1756

HTTPDigestAuthHandler(),

1757

HTTPNegotiateAuthHandler(),

1758

ProxyBasicAuthHandler(),

1759

ProxyDigestAuthHandler(),

1760

ProxyNegotiateAuthHandler(),

1761

HTTPHandler,

1762

HTTPSHandler,

1763

HTTPDefaultErrorHandler,

1764

)

1765

1766

self.open = self._opener.open

1767

if DEBUG >= 9:

1768

# When dealing with handler order, it's easy to mess

1769

# things up, the following will help understand which

1770

# handler is used, when and for what.

1771

import pprint

1772

pprint.pprint(self._opener.__dict__)

1773

1774

1775

class HttpTransport(ConnectedTransport):

1776

"""HTTP Client implementations.

1777

1778

The protocol can be given as e.g. http+urllib://host/ to use a particular

1779

implementation.

1780

"""

1781

1782

# _unqualified_scheme: "http" or "https"

1783

# _scheme: may have "+pycurl", etc

1784

1785

# In order to debug we have to issue our traces in sync with

1786

# httplib, which use print :(

1787

_debuglevel = 0

1788

1789

def __init__(self, base, _from_transport=None, ca_certs=None):

1790

"""Set the base path where files will be stored."""

1791

proto_match = re.match(r'^(https?)(\+\w+)?://', base)

1792

if not proto_match:

1793

raise AssertionError("not a http url: %r" % base)

1794

self._unqualified_scheme = proto_match.group(1)

1795

super(HttpTransport, self).__init__(

1796

base, _from_transport=_from_transport)

1797

self._medium = None

1798

# range hint is handled dynamically throughout the life

1799

# of the transport object. We start by trying multi-range

1800

# requests and if the server returns bogus results, we

1801

# retry with single range requests and, finally, we

1802

# forget about range if the server really can't

1803

# understand. Once acquired, this piece of info is

1804

# propagated to clones.

1805

if _from_transport is not None:

1806

self._range_hint = _from_transport._range_hint

1807

self._opener = _from_transport._opener

1808

else:

1809

self._range_hint = 'multi'

1810

self._opener = Opener(

1811

report_activity=self._report_activity, ca_certs=ca_certs)

1812

1813

def request(self, method, url, fields=None, headers=None, **urlopen_kw):

1814

body = urlopen_kw.pop('body', None)

1815

if fields is not None:

1816

data = urlencode(fields).encode()

1817

if body is not None:

1818

raise ValueError(

1819

'body and fields are mutually exclusive')

1820

else:

1821

data = body

1822

if headers is None:

1823

headers = {}

1824

request = Request(method, url, data, headers)

1825

request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)

1826

if urlopen_kw:

1827

raise NotImplementedError(

1828

'unknown arguments: %r' % urlopen_kw.keys())

1829

connection = self._get_connection()

1830

if connection is not None:

1831

# Give back shared info

1832

request.connection = connection

1833

(auth, proxy_auth) = self._get_credentials()

1834

# Clean the httplib.HTTPConnection pipeline in case the previous

1835

# request couldn't do it

1836

connection.cleanup_pipe()

1837

else:

1838

# First request, initialize credentials.

1839

# scheme and realm will be set by the _urllib2_wrappers.AuthHandler

1840

auth = self._create_auth()

1841

# Proxy initialization will be done by the first proxied request

1842

proxy_auth = dict()

1843

# Ensure authentication info is provided

1844

request.auth = auth

1845

request.proxy_auth = proxy_auth

1846

1847

if self._debuglevel > 0:

1848

print('perform: %s base: %s, url: %s' % (request.method, self.base,

1849

request.get_full_url()))

1850

response = self._opener.open(request)

1851

if self._get_connection() is not request.connection:

1852

# First connection or reconnection

1853

self._set_connection(request.connection,

1854

(request.auth, request.proxy_auth))

1855

else:

1856

# http may change the credentials while keeping the

1857

# connection opened

1858

self._update_credentials((request.auth, request.proxy_auth))

1859

1860

code = response.code

1861

if (request.follow_redirections is False

1862

and code in (301, 302, 303, 307, 308)):

1863

raise errors.RedirectRequested(request.get_full_url(),

1864

request.redirected_to,

1865

is_permanent=(code in (301, 308)))

1866

1867

if request.redirected_to is not None:

1868

trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),

1869

request.redirected_to))

1870

1871

class Urllib3LikeResponse(object):

1872

1873

def __init__(self, actual):

1874

self._actual = actual

1875

self._data = None

1876

1877

def getheader(self, name, default=None):

1878

if self._actual.headers is None:

1879

raise http_client.ResponseNotReady()

1880

return self._actual.headers.get(name, default)

1881

1882

def getheaders(self):

1883

if self._actual.headers is None:

1884

raise http_client.ResponseNotReady()

1885

return list(self._actual.headers.items())

1886

1887

@property

1888

def status(self):

1889

return self._actual.code

1890

1891

@property

1892

def reason(self):

1893

return self._actual.reason

1894

1895

@property

1896

def data(self):

1897

if self._data is None:

1898

self._data = self._actual.read()

1899

return self._data

1900

1901

@property

1902

def text(self):

1903

if self.status == 204:

1904

return None

1905

charset = cgi.parse_header(

1906

self._actual.headers['Content-Type'])[1].get('charset')

1907

if charset:

1908

return self.data.decode(charset)

1909

else:

1910

return self.data.decode()

1911

1912

def read(self, amt=None):

1913

return self._actual.read(amt)

1914

1915

def readlines(self):

1916

return self._actual.readlines()

1917

1918

def readline(self, size=-1):

1919

return self._actual.readline(size)

1920

1921

return Urllib3LikeResponse(response)

1922

1923

def disconnect(self):

1924

connection = self._get_connection()

1925

if connection is not None:

1926

connection.close()

1927

1928

def has(self, relpath):

1929

"""Does the target location exist?

1930

"""

1931

response = self._head(relpath)

1932

1933

code = response.status

1934

if code == 200: # "ok",

1935

return True

1936

else:

1937

return False

1938

1939

def get(self, relpath):

1940

"""Get the file at the given relative path.

1941

1942

:param relpath: The relative path to the file

1943

"""

1944

code, response_file = self._get(relpath, None)

1945

return response_file

1946

1947

def _get(self, relpath, offsets, tail_amount=0):

1948

"""Get a file, or part of a file.

1949

1950

:param relpath: Path relative to transport base URL

1951

:param offsets: None to get the whole file;

1952

or a list of _CoalescedOffset to fetch parts of a file.

1953

:param tail_amount: The amount to get from the end of the file.

1954

1955

:returns: (http_code, result_file)

1956

"""

1957

abspath = self._remote_path(relpath)

1958

headers = {}

1959

if offsets or tail_amount:

1960

range_header = self._attempted_range_header(offsets, tail_amount)

1961

if range_header is not None:

1962

bytes = 'bytes=' + range_header

1963

headers = {'Range': bytes}

1964

else:

1965

range_header = None

1966

1967

response = self.request('GET', abspath, headers=headers)

1968

1969

if response.status == 404: # not found

1970

raise errors.NoSuchFile(abspath)

1971

elif response.status == 416:

1972

# We don't know which, but one of the ranges we specified was

1973

# wrong.

1974

raise errors.InvalidHttpRange(abspath, range_header,

1975

'Server return code %d' % response.status)

1976

elif response.status == 400:

1977

if range_header:

1978

# We don't know which, but one of the ranges we specified was

1979

# wrong.

1980

raise errors.InvalidHttpRange(

1981

abspath, range_header,

1982

'Server return code %d' % response.status)

1983

else:

1984

raise errors.BadHttpRequest(abspath, response.reason)

1985

elif response.status not in (200, 206):

1986

raise errors.UnexpectedHttpStatus(abspath, response.status)

1987

1988

data = handle_response(

1989

abspath, response.status, response.getheader, response)

1990

return response.status, data

1991

1992

def _remote_path(self, relpath):

1993

"""See ConnectedTransport._remote_path.

1994

1995

user and passwords are not embedded in the path provided to the server.

1996

"""

1997

url = self._parsed_url.clone(relpath)

1998

url.user = url.quoted_user = None

1999

url.password = url.quoted_password = None

2000

url.scheme = self._unqualified_scheme

2001

return str(url)

2002

2003

def _create_auth(self):

2004

"""Returns a dict containing the credentials provided at build time."""

2005

auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,

2006

user=self._parsed_url.user, password=self._parsed_url.password,

2007

protocol=self._unqualified_scheme,

2008

path=self._parsed_url.path)

2009

return auth

2010

2011

def get_smart_medium(self):

2012

"""See Transport.get_smart_medium."""

2013

if self._medium is None:

2014

# Since medium holds some state (smart server probing at least), we

2015

# need to keep it around. Note that this is needed because medium

2016

# has the same 'base' attribute as the transport so it can't be

2017

# shared between transports having different bases.

2018

self._medium = SmartClientHTTPMedium(self)

2019

return self._medium

2020

2021

def _degrade_range_hint(self, relpath, ranges):

2022

if self._range_hint == 'multi':

2023

self._range_hint = 'single'

2024

mutter('Retry "%s" with single range request' % relpath)

2025

elif self._range_hint == 'single':

2026

self._range_hint = None

2027

mutter('Retry "%s" without ranges' % relpath)

2028

else:

2029

# We tried all the tricks, but nothing worked, caller must reraise.

2030

return False

2031

return True

2032

2033

# _coalesce_offsets is a helper for readv, it try to combine ranges without

2034

# degrading readv performances. _bytes_to_read_before_seek is the value

2035

# used for the limit parameter and has been tuned for other transports. For

2036

# HTTP, the name is inappropriate but the parameter is still useful and

2037

# helps reduce the number of chunks in the response. The overhead for a

2038

# chunk (headers, length, footer around the data itself is variable but

2039

# around 50 bytes. We use 128 to reduce the range specifiers that appear in

2040

# the header, some servers (notably Apache) enforce a maximum length for a

2041

# header and issue a '400: Bad request' error when too much ranges are

2042

# specified.

2043

_bytes_to_read_before_seek = 128

2044

# No limit on the offset number that get combined into one, we are trying

2045

# to avoid downloading the whole file.

2046

_max_readv_combine = 0

2047

# By default Apache has a limit of ~400 ranges before replying with a 400

2048

# Bad Request. So we go underneath that amount to be safe.

2049

_max_get_ranges = 200

2050

# We impose no limit on the range size. But see _pycurl.py for a different

2051

# use.

2052

_get_max_size = 0

2053

2054

def _readv(self, relpath, offsets):

2055

"""Get parts of the file at the given relative path.

2056

2057

:param offsets: A list of (offset, size) tuples.

2058

:param return: A list or generator of (offset, data) tuples

2059

"""

2060

# offsets may be a generator, we will iterate it several times, so

2061

# build a list

2062

offsets = list(offsets)

2063

2064

try_again = True

2065

retried_offset = None

2066

while try_again:

2067

try_again = False

2068

2069

# Coalesce the offsets to minimize the GET requests issued

2070

sorted_offsets = sorted(offsets)

2071

coalesced = self._coalesce_offsets(

2072

sorted_offsets, limit=self._max_readv_combine,

2073

fudge_factor=self._bytes_to_read_before_seek,

2074

max_size=self._get_max_size)

2075

2076

# Turn it into a list, we will iterate it several times

2077

coalesced = list(coalesced)

2078

if 'http' in debug.debug_flags:

2079

mutter('http readv of %s offsets => %s collapsed %s',

2080

relpath, len(offsets), len(coalesced))

2081

2082

# Cache the data read, but only until it's been used

2083

data_map = {}

2084

# We will iterate on the data received from the GET requests and

2085

# serve the corresponding offsets respecting the initial order. We

2086

# need an offset iterator for that.

2087

iter_offsets = iter(offsets)

2088

try:

2089

cur_offset_and_size = next(iter_offsets)

2090

except StopIteration:

2091

return

2092

2093

try:

2094

for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):

2095

# Split the received chunk

2096

for offset, size in cur_coal.ranges:

2097

start = cur_coal.start + offset

2098

rfile.seek(start, os.SEEK_SET)

2099

data = rfile.read(size)

2100

data_len = len(data)

2101

if data_len != size:

2102

raise errors.ShortReadvError(relpath, start, size,

2103

actual=data_len)

2104

if (start, size) == cur_offset_and_size:

2105

# The offset requested are sorted as the coalesced

2106

# ones, no need to cache. Win !

2107

yield cur_offset_and_size[0], data

2108

try:

2109

cur_offset_and_size = next(iter_offsets)

2110

except StopIteration:

2111

return

2112

else:

2113

# Different sorting. We need to cache.

2114

data_map[(start, size)] = data

2115

2116

# Yield everything we can

2117

while cur_offset_and_size in data_map:

2118

# Clean the cached data since we use it

2119

# XXX: will break if offsets contains duplicates --

2120

# vila20071129

2121

this_data = data_map.pop(cur_offset_and_size)

2122

yield cur_offset_and_size[0], this_data

2123

try:

2124

cur_offset_and_size = next(iter_offsets)

2125

except StopIteration:

2126

return

2127

2128

except (errors.ShortReadvError, errors.InvalidRange,

2129

errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:

2130

mutter('Exception %r: %s during http._readv', e, e)

2131

if (not isinstance(e, errors.ShortReadvError)

2132

or retried_offset == cur_offset_and_size):

2133

# We don't degrade the range hint for ShortReadvError since

2134

# they do not indicate a problem with the server ability to

2135

# handle ranges. Except when we fail to get back a required

2136

# offset twice in a row. In that case, falling back to

2137

# single range or whole file should help.

2138

if not self._degrade_range_hint(relpath, coalesced):

2139

raise

2140

# Some offsets may have been already processed, so we retry

2141

# only the unsuccessful ones.

2142

offsets = [cur_offset_and_size] + [o for o in iter_offsets]

2143

retried_offset = cur_offset_and_size

2144

try_again = True

2145

2146

def _coalesce_readv(self, relpath, coalesced):

2147

"""Issue several GET requests to satisfy the coalesced offsets"""

2148

2149

def get_and_yield(relpath, coalesced):

2150

if coalesced:

2151

# Note that the _get below may raise

2152

# errors.InvalidHttpRange. It's the caller's responsibility to

2153

# decide how to retry since it may provide different coalesced

2154

# offsets.

2155

code, rfile = self._get(relpath, coalesced)

2156

for coal in coalesced:

2157

yield coal, rfile

2158

2159

if self._range_hint is None:

2160

# Download whole file

2161

for c, rfile in get_and_yield(relpath, coalesced):

2162

yield c, rfile

2163

else:

2164

total = len(coalesced)

2165

if self._range_hint == 'multi':

2166

max_ranges = self._max_get_ranges

2167

elif self._range_hint == 'single':

2168

max_ranges = total

2169

else:

2170

raise AssertionError("Unknown _range_hint %r"

2171

% (self._range_hint,))

2172

# TODO: Some web servers may ignore the range requests and return

2173

# the whole file, we may want to detect that and avoid further

2174

# requests.

2175

# Hint: test_readv_multiple_get_requests will fail once we do that

2176

cumul = 0

2177

ranges = []

2178

for coal in coalesced:

2179

if ((self._get_max_size > 0

2180

and cumul + coal.length > self._get_max_size) or

2181

len(ranges) >= max_ranges):

2182

# Get that much and yield

2183

for c, rfile in get_and_yield(relpath, ranges):

2184

yield c, rfile

2185

# Restart with the current offset

2186

ranges = [coal]

2187

cumul = coal.length

2188

else:

2189

ranges.append(coal)

2190

cumul += coal.length

2191

# Get the rest and yield

2192

for c, rfile in get_and_yield(relpath, ranges):

2193

yield c, rfile

2194

2195

def recommended_page_size(self):

2196

"""See Transport.recommended_page_size().

2197

2198

For HTTP we suggest a large page size to reduce the overhead

2199

introduced by latency.

2200

"""

2201

return 64 * 1024

2202

2203

def _post(self, body_bytes):

2204

"""POST body_bytes to .bzr/smart on this transport.

2205

2206

:returns: (response code, response body file-like object).

2207

"""

2208

# TODO: Requiring all the body_bytes to be available at the beginning of

2209

# the POST may require large client buffers. It would be nice to have

2210

# an interface that allows streaming via POST when possible (and

2211

# degrades to a local buffer when not).

2212

abspath = self._remote_path('.bzr/smart')

2213

response = self.request(

2214

'POST', abspath, body=body_bytes,

2215

headers={'Content-Type': 'application/octet-stream'})

2216

if response.status not in (200, 403):

2217

raise errors.UnexpectedHttpStatus(abspath, response.status)

2218

code = response.status

2219

data = handle_response(

2220

abspath, code, response.getheader, response)

2221

return code, data

2222

2223

def _head(self, relpath):

2224

"""Request the HEAD of a file.

2225

2226

Performs the request and leaves callers handle the results.

2227

"""

2228

abspath = self._remote_path(relpath)

2229

response = self.request('HEAD', abspath)

2230

if response.status not in (200, 404):

2231

raise errors.UnexpectedHttpStatus(abspath, response.status)

2232

2233

return response

2234

2235

raise NotImplementedError(self._post)

2236

2237

def put_file(self, relpath, f, mode=None):

2238

"""Copy the file-like object into the location.

2239

2240

:param relpath: Location to put the contents, relative to base.

2241

:param f: File-like object.

2242

"""

2243

raise errors.TransportNotPossible('http PUT not supported')

2244

2245

def mkdir(self, relpath, mode=None):

2246

"""Create a directory at the given path."""

2247

raise errors.TransportNotPossible('http does not support mkdir()')

2248

2249

def rmdir(self, relpath):

2250

"""See Transport.rmdir."""

2251

raise errors.TransportNotPossible('http does not support rmdir()')

2252

2253

def append_file(self, relpath, f, mode=None):

2254

"""Append the text in the file-like object into the final

2255

location.

2256

"""

2257

raise errors.TransportNotPossible('http does not support append()')

2258

2259

def copy(self, rel_from, rel_to):

2260

"""Copy the item at rel_from to the location at rel_to"""

2261

raise errors.TransportNotPossible('http does not support copy()')

2262

2263

def copy_to(self, relpaths, other, mode=None, pb=None):

2264

"""Copy a set of entries from self into another Transport.

2265

2266

:param relpaths: A list/generator of entries to be copied.

2267

2268

TODO: if other is LocalTransport, is it possible to

2269

do better than put(get())?

2270

"""

2271

# At this point HttpTransport might be able to check and see if

2272

# the remote location is the same, and rather than download, and

2273

# then upload, it could just issue a remote copy_this command.

2274

if isinstance(other, HttpTransport):

2275

raise errors.TransportNotPossible(

2276

'http cannot be the target of copy_to()')

2277

else:

2278

return super(HttpTransport, self).\

2279

copy_to(relpaths, other, mode=mode, pb=pb)

2280

2281

def move(self, rel_from, rel_to):

2282

"""Move the item at rel_from to the location at rel_to"""

2283

raise errors.TransportNotPossible('http does not support move()')

2284

2285

def delete(self, relpath):

2286

"""Delete the item at relpath"""

2287

raise errors.TransportNotPossible('http does not support delete()')

2288

2289

def external_url(self):

2290

"""See breezy.transport.Transport.external_url."""

2291

# HTTP URL's are externally usable as long as they don't mention their

2292

# implementation qualifier

2293

url = self._parsed_url.clone()

2294

url.scheme = self._unqualified_scheme

2295

return str(url)

2296

2297

def is_readonly(self):

2298

"""See Transport.is_readonly."""

2299

return True

2300

2301

def listable(self):

2302

"""See Transport.listable."""

2303

return False

2304

2305

def stat(self, relpath):

2306

"""Return the stat information for a file.

2307

"""

2308

raise errors.TransportNotPossible('http does not support stat()')

2309

2310

def lock_read(self, relpath):

2311

"""Lock the given file for shared (read) access.

2312

:return: A lock object, which should be passed to Transport.unlock()

2313

"""

2314

# The old RemoteBranch ignore lock for reading, so we will

2315

# continue that tradition and return a bogus lock object.

2316

class BogusLock(object):

2317

def __init__(self, path):

2318

self.path = path

2319

2320

def unlock(self):

2321

pass

2322

return BogusLock(relpath)

2323

2324

def lock_write(self, relpath):

2325

"""Lock the given file for exclusive (write) access.

2326

WARNING: many transports do not support this, so trying avoid using it

2327

2328

:return: A lock object, which should be passed to Transport.unlock()

2329

"""

2330

raise errors.TransportNotPossible('http does not support lock_write()')

2331

2332

def _attempted_range_header(self, offsets, tail_amount):

2333

"""Prepare a HTTP Range header at a level the server should accept.

2334

2335

:return: the range header representing offsets/tail_amount or None if

2336

no header can be built.

2337

"""

2338

2339

if self._range_hint == 'multi':

2340

# Generate the header describing all offsets

2341

return self._range_header(offsets, tail_amount)

2342

elif self._range_hint == 'single':

2343

# Combine all the requested ranges into a single

2344

# encompassing one

2345

if len(offsets) > 0:

2346

if tail_amount not in (0, None):

2347

# Nothing we can do here to combine ranges with tail_amount

2348

# in a single range, just returns None. The whole file

2349

# should be downloaded.

2350

return None

2351

else:

2352

start = offsets[0].start

2353

last = offsets[-1]

2354

end = last.start + last.length - 1

2355

whole = self._coalesce_offsets([(start, end - start + 1)],

2356

limit=0, fudge_factor=0)

2357

return self._range_header(list(whole), 0)

2358

else:

2359

# Only tail_amount, requested, leave range_header

2360

# do its work

2361

return self._range_header(offsets, tail_amount)

2362

else:

2363

return None

2364

2365

@staticmethod

2366

def _range_header(ranges, tail_amount):

2367

"""Turn a list of bytes ranges into a HTTP Range header value.

2368

2369

:param ranges: A list of _CoalescedOffset

2370

:param tail_amount: The amount to get from the end of the file.

2371

2372

:return: HTTP range header string.

2373

2374

At least a non-empty ranges *or* a tail_amount must be

2375

provided.

2376

"""

2377

strings = []

2378

for offset in ranges:

2379

strings.append('%d-%d' % (offset.start,

2380

offset.start + offset.length - 1))

2381

2382

if tail_amount:

2383

strings.append('-%d' % tail_amount)

2384

2385

return ','.join(strings)

2386

2387

def _redirected_to(self, source, target):

2388

"""Returns a transport suitable to re-issue a redirected request.

2389

2390

:param source: The source url as returned by the server.

2391

:param target: The target url as returned by the server.

2392

2393

The redirection can be handled only if the relpath involved is not

2394

renamed by the redirection.

2395

2396

:returns: A transport

2397

:raise UnusableRedirect: when the URL can not be reinterpreted

2398

"""

2399

parsed_source = self._split_url(source)

2400

parsed_target = self._split_url(target)

2401

pl = len(self._parsed_url.path)

2402

# determine the excess tail - the relative path that was in

2403

# the original request but not part of this transports' URL.

2404

excess_tail = parsed_source.path[pl:].strip("/")

2405

if not parsed_target.path.endswith(excess_tail):

2406

# The final part of the url has been renamed, we can't handle the

2407

# redirection.

2408

raise UnusableRedirect(

2409

source, target, "final part of the url was renamed")

2410

2411

target_path = parsed_target.path

2412

if excess_tail:

2413

# Drop the tail that was in the redirect but not part of

2414

# the path of this transport.

2415

target_path = target_path[:-len(excess_tail)]

2416

2417

if parsed_target.scheme in ('http', 'https'):

2418

# Same protocol family (i.e. http[s]), we will preserve the same

2419

# http client implementation when a redirection occurs from one to

2420

# the other (otherwise users may be surprised that bzr switches

2421

# from one implementation to the other, and devs may suffer

2422

# debugging it).

2423

if (parsed_target.scheme == self._unqualified_scheme

2424

and parsed_target.host == self._parsed_url.host

2425

and parsed_target.port == self._parsed_url.port

2426

and (parsed_target.user is None or

2427

parsed_target.user == self._parsed_url.user)):

2428

# If a user is specified, it should match, we don't care about

2429

# passwords, wrong passwords will be rejected anyway.

2430

return self.clone(target_path)

2431

else:

2432

# Rebuild the url preserving the scheme qualification and the

2433

# credentials (if they don't apply, the redirected to server

2434

# will tell us, but if they do apply, we avoid prompting the

2435

# user)

2436

redir_scheme = parsed_target.scheme

2437

new_url = self._unsplit_url(redir_scheme,

2438

self._parsed_url.user,

2439

self._parsed_url.password,

2440

parsed_target.host, parsed_target.port,

2441

target_path)

2442

return transport.get_transport_from_url(new_url)

2443

else:

2444

# Redirected to a different protocol

2445

new_url = self._unsplit_url(parsed_target.scheme,

2446

parsed_target.user,

2447

parsed_target.password,

2448

parsed_target.host, parsed_target.port,

2449

target_path)

2450

return transport.get_transport_from_url(new_url)

2451

2452

def _options(self, relpath):

2453

abspath = self._remote_path(relpath)

2454

resp = self.request('OPTIONS', abspath)

2455

if resp.status == 404:

2456

raise errors.NoSuchFile(abspath)

2457

if resp.status in (403, 405):

2458

raise errors.InvalidHttpResponse(

2459

abspath,

2460

"OPTIONS not supported or forbidden for remote URL")

2461

return resp.getheaders()

2462

2463

2464

# TODO: May be better located in smart/medium.py with the other

2465

# SmartMedium classes

2466

class SmartClientHTTPMedium(medium.SmartClientMedium):

2467

2468

def __init__(self, http_transport):

2469

super(SmartClientHTTPMedium, self).__init__(http_transport.base)

2470

# We don't want to create a circular reference between the http

2471

# transport and its associated medium. Since the transport will live

2472

# longer than the medium, the medium keep only a weak reference to its

2473

# transport.

2474

self._http_transport_ref = weakref.ref(http_transport)

2475

2476

def get_request(self):

2477

return SmartClientHTTPMediumRequest(self)

2478

2479

def should_probe(self):

2480

return True

2481

2482

def remote_path_from_transport(self, transport):

2483

# Strip the optional 'bzr+' prefix from transport so it will have the

2484

# same scheme as self.

2485

transport_base = transport.base

2486

if transport_base.startswith('bzr+'):

2487

transport_base = transport_base[4:]

2488

rel_url = urlutils.relative_url(self.base, transport_base)

2489

return urlutils.unquote(rel_url)

2490

2491

def send_http_smart_request(self, bytes):

2492

try:

2493

# Get back the http_transport hold by the weak reference

2494

t = self._http_transport_ref()

2495

code, body_filelike = t._post(bytes)

2496

if code != 200:

2497

raise errors.UnexpectedHttpStatus(

2498

t._remote_path('.bzr/smart'), code)

2499

except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:

2500

raise errors.SmartProtocolError(str(e))

2501

return body_filelike

2502

2503

def _report_activity(self, bytes, direction):

2504

"""See SmartMedium._report_activity.

2505

2506

Does nothing; the underlying plain HTTP transport will report the

2507

activity that this medium would report.

2508

"""

2509

pass

2510

2511

def disconnect(self):

2512

"""See SmartClientMedium.disconnect()."""

2513

t = self._http_transport_ref()

2514

t.disconnect()

2515

2516

2517

# TODO: May be better located in smart/medium.py with the other

2518

# SmartMediumRequest classes

2519

class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):

2520

"""A SmartClientMediumRequest that works with an HTTP medium."""

2521

2522

def __init__(self, client_medium):

2523

medium.SmartClientMediumRequest.__init__(self, client_medium)

2524

self._buffer = b''

2525

2526

def _accept_bytes(self, bytes):

2527

self._buffer += bytes

2528

2529

def _finished_writing(self):

2530

data = self._medium.send_http_smart_request(self._buffer)

2531

self._response_body = data

2532

2533

def _read_bytes(self, count):

2534

"""See SmartClientMediumRequest._read_bytes."""

2535

return self._response_body.read(count)

2536

2537

def _read_line(self):

2538

line, excess = medium._get_line(self._response_body.read)

2539

if excess != b'':

2540

raise AssertionError(

2541

'_get_line returned excess bytes, but this mediumrequest '

2542

'cannot handle excess. (%r)' % (excess,))

2543

return line

2544

2545

def _finished_reading(self):

2546

"""See SmartClientMediumRequest._finished_reading."""

2547

pass

2548

2549

2550

def unhtml_roughly(maybe_html, length_limit=1000):

2551

"""Very approximate html->text translation, for presenting error bodies.

2552

2553

:param length_limit: Truncate the result to this many characters.

2554

2555

>>> unhtml_roughly("<b>bad</b> things happened\\n")

2556

' bad things happened '

2557

"""

2558

return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit]

2559

2560

2561

def get_test_permutations():

2562

"""Return the permutations to be used in testing."""

2563

from breezy.tests import (

2564

features,

2565

http_server,

2566

)

2567

permutations = [(HttpTransport, http_server.HttpServer), ]

2568

if features.HTTPSServerFeature.available():

2569

from breezy.tests import (

2570

https_server,

2571

ssl_certs,

2572

)

2573

2574

class HTTPS_transport(HttpTransport):

2575

2576

def __init__(self, base, _from_transport=None):

2577

super(HTTPS_transport, self).__init__(

2578

base, _from_transport=_from_transport,

2579

ca_certs=ssl_certs.build_path('ca.crt'))

2580

2581

permutations.append((HTTPS_transport,

2582

https_server.HTTPSServer))

2583

return permutations