/brz/remove-bazaar : revision 7526.2.1

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Base implementation of Transport over http using urllib.

18

19

There are separate implementation modules for each http client implementation.

20

"""

21

22

from __future__ import absolute_import

23

24

DEBUG = 0

25

26

import base64

27

import cgi

28

import errno

29

import os

30

import re

31

import socket

32

import ssl

33

import sys

34

import time

35

import urllib

36

import weakref

37

38

try:

39

import http.client as http_client

40

except ImportError:

41

import httplib as http_client

42

try:

43

import urllib.request as urllib_request

44

except ImportError: # python < 3

45

import urllib2 as urllib_request

46

try:

47

from urllib.parse import urljoin, splitport, splittype, splithost, urlencode

48

except ImportError:

49

from urlparse import urljoin

50

from urllib import splitport, splittype, splithost, urlencode

51

52

# TODO: handle_response should be integrated into the http/__init__.py

53

from .response import handle_response

54

55

# FIXME: Oversimplifying, two kind of exceptions should be

56

# raised, once a request is issued: URLError before we have been

57

# able to process the response, HTTPError after that. Process the

58

# response means we are able to leave the socket clean, so if we

59

# are not able to do that, we should close the connection. The

60

# actual code more or less do that, tests should be written to

61

# ensure that.

62

63

from ... import __version__ as breezy_version

64

from ... import (

65

config,

66

debug,

67

errors,

68

lazy_import,

69

osutils,

70

trace,

71

transport,

72

ui,

73

urlutils,

74

)

75

from ...bzr.smart import medium

76

from ...sixish import (

77

PY3,

78

reraise,

79

text_type,

80

)

81

from ...trace import mutter

82

from ...transport import (

83

ConnectedTransport,

84

UnusableRedirect,

85

)

86

87

from . import default_user_agent, ssl

88

89

90

checked_kerberos = False

91

kerberos = None

92

93

94

class addinfourl(urllib_request.addinfourl):

95

'''Replacement addinfourl class compatible with python-2.7's xmlrpclib

96

97

In python-2.7, xmlrpclib expects that the response object that it receives

98

has a getheader method. http_client.HTTPResponse provides this but

99

urllib_request.addinfourl does not. Add the necessary functions here, ported to

100

use the internal data structures of addinfourl.

101

'''

102

103

def getheader(self, name, default=None):

104

if self.headers is None:

105

raise http_client.ResponseNotReady()

106

return self.headers.getheader(name, default)

107

108

def getheaders(self):

109

if self.headers is None:

110

raise http_client.ResponseNotReady()

111

return list(self.headers.items())

112

113

114

class _ReportingFileSocket(object):

115

116

def __init__(self, filesock, report_activity=None):

117

self.filesock = filesock

118

self._report_activity = report_activity

119

120

def report_activity(self, size, direction):

121

if self._report_activity:

122

self._report_activity(size, direction)

123

124

def read(self, size=1):

125

s = self.filesock.read(size)

126

self.report_activity(len(s), 'read')

127

return s

128

129

def readline(self, size=-1):

130

s = self.filesock.readline(size)

131

self.report_activity(len(s), 'read')

132

return s

133

134

def readinto(self, b):

135

s = self.filesock.readinto(b)

136

self.report_activity(s, 'read')

137

return s

138

139

def __getattr__(self, name):

140

return getattr(self.filesock, name)

141

142

143

class _ReportingSocket(object):

144

145

def __init__(self, sock, report_activity=None):

146

self.sock = sock

147

self._report_activity = report_activity

148

149

def report_activity(self, size, direction):

150

if self._report_activity:

151

self._report_activity(size, direction)

152

153

def sendall(self, s, *args):

154

self.sock.sendall(s, *args)

155

self.report_activity(len(s), 'write')

156

157

def recv(self, *args):

158

s = self.sock.recv(*args)

159

self.report_activity(len(s), 'read')

160

return s

161

162

def makefile(self, mode='r', bufsize=-1):

163

# http_client creates a fileobject that doesn't do buffering, which

164

# makes fp.readline() very expensive because it only reads one byte

165

# at a time. So we wrap the socket in an object that forces

166

# sock.makefile to make a buffered file.

167

fsock = self.sock.makefile(mode, 65536)

168

# And wrap that into a reporting kind of fileobject

169

return _ReportingFileSocket(fsock, self._report_activity)

170

171

def __getattr__(self, name):

172

return getattr(self.sock, name)

173

174

175

# We define our own Response class to keep our http_client pipe clean

176

class Response(http_client.HTTPResponse):

177

"""Custom HTTPResponse, to avoid the need to decorate.

178

179

http_client prefers to decorate the returned objects, rather

180

than using a custom object.

181

"""

182

183

# Some responses have bodies in which we have no interest

184

_body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]

185

186

# in finish() below, we may have to discard several MB in the worst

187

# case. To avoid buffering that much, we read and discard by chunks

188

# instead. The underlying file is either a socket or a StringIO, so reading

189

# 8k chunks should be fine.

190

_discarded_buf_size = 8192

191

192

if PY3:

193

def __init__(self, sock, debuglevel=0, method=None, url=None):

194

self.url = url

195

super(Response, self).__init__(

196

sock, debuglevel=debuglevel, method=method, url=url)

197

198

def begin(self):

199

"""Begin to read the response from the server.

200

201

http_client assumes that some responses get no content and do

202

not even attempt to read the body in that case, leaving

203

the body in the socket, blocking the next request. Let's

204

try to workaround that.

205

"""

206

http_client.HTTPResponse.begin(self)

207

if self.status in self._body_ignored_responses:

208

if self.debuglevel >= 2:

209

print("For status: [%s], will ready body, length: %s" % (

210

self.status, self.length))

211

if not (self.length is None or self.will_close):

212

# In some cases, we just can't read the body not

213

# even try or we may encounter a 104, 'Connection

214

# reset by peer' error if there is indeed no body

215

# and the server closed the connection just after

216

# having issued the response headers (even if the

217

# headers indicate a Content-Type...)

218

body = self.read(self.length)

219

if self.debuglevel >= 9:

220

# This one can be huge and is generally not interesting

221

print("Consumed body: [%s]" % body)

222

self.close()

223

elif self.status == 200:

224

# Whatever the request is, it went ok, so we surely don't want to

225

# close the connection. Some cases are not correctly detected by

226

# http_client.HTTPConnection.getresponse (called by

227

# http_client.HTTPResponse.begin). The CONNECT response for the https

228

# through proxy case is one. Note: the 'will_close' below refers

229

# to the "true" socket between us and the server, whereas the

230

# 'close()' above refers to the copy of that socket created by

231

# http_client for the response itself. So, in the if above we close the

232

# socket to indicate that we are done with the response whereas

233

# below we keep the socket with the server opened.

234

self.will_close = False

235

236

def finish(self):

237

"""Finish reading the body.

238

239

In some cases, the client may have left some bytes to read in the

240

body. That will block the next request to succeed if we use a

241

persistent connection. If we don't use a persistent connection, well,

242

nothing will block the next request since a new connection will be

243

issued anyway.

244

245

:return: the number of bytes left on the socket (may be None)

246

"""

247

pending = None

248

if not self.isclosed():

249

# Make sure nothing was left to be read on the socket

250

pending = 0

251

data = True

252

while data and self.length:

253

# read() will update self.length

254

data = self.read(min(self.length, self._discarded_buf_size))

255

pending += len(data)

256

if pending:

257

trace.mutter("%s bytes left on the HTTP socket", pending)

258

self.close()

259

return pending

260

261

262

# Not inheriting from 'object' because http_client.HTTPConnection doesn't.

263

class AbstractHTTPConnection:

264

"""A custom HTTP(S) Connection, which can reset itself on a bad response"""

265

266

response_class = Response

267

268

# When we detect a server responding with the whole file to range requests,

269

# we want to warn. But not below a given thresold.

270

_range_warning_thresold = 1024 * 1024

271

272

def __init__(self, report_activity=None):

273

self._response = None

274

self._report_activity = report_activity

275

self._ranges_received_whole_file = None

276

277

def _mutter_connect(self):

278

netloc = '%s:%s' % (self.host, self.port)

279

if self.proxied_host is not None:

280

netloc += '(proxy for %s)' % self.proxied_host

281

trace.mutter('* About to connect() to %s' % netloc)

282

283

def getresponse(self):

284

"""Capture the response to be able to cleanup"""

285

self._response = http_client.HTTPConnection.getresponse(self)

286

return self._response

287

288

def cleanup_pipe(self):

289

"""Read the remaining bytes of the last response if any."""

290

if self._response is not None:

291

try:

292

pending = self._response.finish()

293

# Warn the user (once)

294

if (self._ranges_received_whole_file is None

295

and self._response.status == 200

296

and pending

297

and pending > self._range_warning_thresold):

298

self._ranges_received_whole_file = True

299

trace.warning(

300

'Got a 200 response when asking for multiple ranges,'

301

' does your server at %s:%s support range requests?',

302

self.host, self.port)

303

except socket.error as e:

304

# It's conceivable that the socket is in a bad state here

305

# (including some test cases) and in this case, it doesn't need

306

# cleaning anymore, so no need to fail, we just get rid of the

307

# socket and let callers reconnect

308

if (len(e.args) == 0

309

or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):

310

raise

311

self.close()

312

self._response = None

313

# Preserve our preciousss

314

sock = self.sock

315

self.sock = None

316

# Let http_client.HTTPConnection do its housekeeping

317

self.close()

318

# Restore our preciousss

319

self.sock = sock

320

321

def _wrap_socket_for_reporting(self, sock):

322

"""Wrap the socket before anybody use it."""

323

self.sock = _ReportingSocket(sock, self._report_activity)

324

325

326

class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):

327

328

# XXX: Needs refactoring at the caller level.

329

def __init__(self, host, port=None, proxied_host=None,

330

report_activity=None, ca_certs=None):

331

AbstractHTTPConnection.__init__(self, report_activity=report_activity)

332

if PY3:

333

http_client.HTTPConnection.__init__(self, host, port)

334

else:

335

# Use strict=True since we don't support HTTP/0.9

336

http_client.HTTPConnection.__init__(self, host, port, strict=True)

337

self.proxied_host = proxied_host

338

# ca_certs is ignored, it's only relevant for https

339

340

def connect(self):

341

if 'http' in debug.debug_flags:

342

self._mutter_connect()

343

http_client.HTTPConnection.connect(self)

344

self._wrap_socket_for_reporting(self.sock)

345

346

347

class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):

348

349

def __init__(self, host, port=None, key_file=None, cert_file=None,

350

proxied_host=None,

351

report_activity=None, ca_certs=None):

352

AbstractHTTPConnection.__init__(self, report_activity=report_activity)

353

if PY3:

354

http_client.HTTPSConnection.__init__(

355

self, host, port, key_file, cert_file)

356

else:

357

# Use strict=True since we don't support HTTP/0.9

358

http_client.HTTPSConnection.__init__(self, host, port,

359

key_file, cert_file, strict=True)

360

self.proxied_host = proxied_host

361

self.ca_certs = ca_certs

362

363

def connect(self):

364

if 'http' in debug.debug_flags:

365

self._mutter_connect()

366

http_client.HTTPConnection.connect(self)

367

self._wrap_socket_for_reporting(self.sock)

368

if self.proxied_host is None:

369

self.connect_to_origin()

370

371

def connect_to_origin(self):

372

# FIXME JRV 2011-12-18: Use location config here?

373

config_stack = config.GlobalStack()

374

cert_reqs = config_stack.get('ssl.cert_reqs')

375

if self.proxied_host is not None:

376

host = self.proxied_host.split(":", 1)[0]

377

else:

378

host = self.host

379

if cert_reqs == ssl.CERT_NONE:

380

ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)

381

ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')

382

ca_certs = None

383

else:

384

if self.ca_certs is None:

385

ca_certs = config_stack.get('ssl.ca_certs')

386

else:

387

ca_certs = self.ca_certs

388

if ca_certs is None:

389

trace.warning(

390

"No valid trusted SSL CA certificates file set. See "

391

"'brz help ssl.ca_certs' for more information on setting "

392

"trusted CAs.")

393

try:

394

ssl_context = ssl.create_default_context(

395

purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)

396

ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE

397

if self.cert_file:

398

ssl_context.load_cert_chain(

399

keyfile=self.key_file, certfile=self.cert_file)

400

ssl_context.verify_mode = cert_reqs

401

ssl_sock = ssl_context.wrap_socket(

402

self.sock, server_hostname=self.host)

403

except ssl.SSLError:

404

trace.note(

405

"\n"

406

"See `brz help ssl.ca_certs` for how to specify trusted CA"

407

"certificates.\n"

408

"Pass -Ossl.cert_reqs=none to disable certificate "

409

"verification entirely.\n")

410

raise

411

# Wrap the ssl socket before anybody use it

412

self._wrap_socket_for_reporting(ssl_sock)

413

414

415

class Request(urllib_request.Request):

416

"""A custom Request object.

417

418

urllib_request determines the request method heuristically (based on

419

the presence or absence of data). We set the method

420

statically.

421

422

The Request object tracks:

423

- the connection the request will be made on.

424

- the authentication parameters needed to preventively set

425

the authentication header once a first authentication have

426

been made.

427

"""

428

429

def __init__(self, method, url, data=None, headers={},

430

origin_req_host=None, unverifiable=False,

431

connection=None, parent=None):

432

urllib_request.Request.__init__(

433

self, url, data, headers,

434

origin_req_host, unverifiable)

435

self.method = method

436

self.connection = connection

437

# To handle redirections

438

self.parent = parent

439

self.redirected_to = None

440

# Unless told otherwise, redirections are not followed

441

self.follow_redirections = False

442

# auth and proxy_auth are dicts containing, at least

443

# (scheme, host, port, realm, user, password, protocol, path).

444

# The dict entries are mostly handled by the AuthHandler.

445

# Some authentication schemes may add more entries.

446

self.auth = {}

447

self.proxy_auth = {}

448

self.proxied_host = None

449

450

def get_method(self):

451

return self.method

452

453

def set_proxy(self, proxy, type):

454

"""Set the proxy and remember the proxied host."""

455

if PY3:

456

host, port = splitport(self.host)

457

else:

458

host, port = splitport(self.get_host())

459

if port is None:

460

# We need to set the default port ourselves way before it gets set

461

# in the HTTP[S]Connection object at build time.

462

if self.type == 'https':

463

conn_class = HTTPSConnection

464

else:

465

conn_class = HTTPConnection

466

port = conn_class.default_port

467

self.proxied_host = '%s:%s' % (host, port)

468

urllib_request.Request.set_proxy(self, proxy, type)

469

# When urllib_request makes a https request with our wrapper code and a proxy,

470

# it sets Host to the https proxy, not the host we want to talk to.

471

# I'm fairly sure this is our fault, but what is the cause is an open

472

# question. -- Robert Collins May 8 2010.

473

self.add_unredirected_header('Host', self.proxied_host)

474

475

476

class _ConnectRequest(Request):

477

478

def __init__(self, request):

479

"""Constructor

480

481

:param request: the first request sent to the proxied host, already

482

processed by the opener (i.e. proxied_host is already set).

483

"""

484

# We give a fake url and redefine selector or urllib_request will be

485

# confused

486

Request.__init__(self, 'CONNECT', request.get_full_url(),

487

connection=request.connection)

488

if request.proxied_host is None:

489

raise AssertionError()

490

self.proxied_host = request.proxied_host

491

492

@property

493

def selector(self):

494

return self.proxied_host

495

496

def get_selector(self):

497

return self.selector

498

499

def set_proxy(self, proxy, type):

500

"""Set the proxy without remembering the proxied host.

501

502

We already know the proxied host by definition, the CONNECT request

503

occurs only when the connection goes through a proxy. The usual

504

processing (masquerade the request so that the connection is done to

505

the proxy while the request is targeted at another host) does not apply

506

here. In fact, the connection is already established with proxy and we

507

just want to enable the SSL tunneling.

508

"""

509

urllib_request.Request.set_proxy(self, proxy, type)

510

511

512

class ConnectionHandler(urllib_request.BaseHandler):

513

"""Provides connection-sharing by pre-processing requests.

514

515

urllib_request provides no way to access the HTTPConnection object

516

internally used. But we need it in order to achieve

517

connection sharing. So, we add it to the request just before

518

it is processed, and then we override the do_open method for

519

http[s] requests in AbstractHTTPHandler.

520

"""

521

522

handler_order = 1000 # after all pre-processings

523

524

def __init__(self, report_activity=None, ca_certs=None):

525

self._report_activity = report_activity

526

self.ca_certs = ca_certs

527

528

def create_connection(self, request, http_connection_class):

529

host = request.host

530

if not host:

531

# Just a bit of paranoia here, this should have been

532

# handled in the higher levels

533

raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')

534

535

# We create a connection (but it will not connect until the first

536

# request is made)

537

try:

538

connection = http_connection_class(

539

host, proxied_host=request.proxied_host,

540

report_activity=self._report_activity,

541

ca_certs=self.ca_certs)

542

except http_client.InvalidURL as exception:

543

# There is only one occurrence of InvalidURL in http_client

544

raise urlutils.InvalidURL(request.get_full_url(),

545

extra='nonnumeric port')

546

547

return connection

548

549

def capture_connection(self, request, http_connection_class):

550

"""Capture or inject the request connection.

551

552

Two cases:

553

- the request have no connection: create a new one,

554

555

- the request have a connection: this one have been used

556

already, let's capture it, so that we can give it to

557

another transport to be reused. We don't do that

558

ourselves: the Transport object get the connection from

559

a first request and then propagate it, from request to

560

request or to cloned transports.

561

"""

562

connection = request.connection

563

if connection is None:

564

# Create a new one

565

connection = self.create_connection(request, http_connection_class)

566

request.connection = connection

567

568

# All connections will pass here, propagate debug level

569

connection.set_debuglevel(DEBUG)

570

return request

571

572

def http_request(self, request):

573

return self.capture_connection(request, HTTPConnection)

574

575

def https_request(self, request):

576

return self.capture_connection(request, HTTPSConnection)

577

578

579

class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):

580

"""A custom handler for HTTP(S) requests.

581

582

We overrive urllib_request.AbstractHTTPHandler to get a better

583

control of the connection, the ability to implement new

584

request types and return a response able to cope with

585

persistent connections.

586

"""

587

588

# We change our order to be before urllib_request HTTP[S]Handlers

589

# and be chosen instead of them (the first http_open called

590

# wins).

591

handler_order = 400

592

593

_default_headers = {'Pragma': 'no-cache',

594

'Cache-control': 'max-age=0',

595

'Connection': 'Keep-Alive',

596

'User-agent': default_user_agent(),

597

'Accept': '*/*',

598

}

599

600

def __init__(self):

601

urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)

602

603

def http_request(self, request):

604

"""Common headers setting"""

605

606

for name, value in self._default_headers.items():

607

if name not in request.headers:

608

request.headers[name] = value

609

# FIXME: We may have to add the Content-Length header if

610

# we have data to send.

611

return request

612

613

def retry_or_raise(self, http_class, request, first_try):

614

"""Retry the request (once) or raise the exception.

615

616

urllib_request raises exception of application level kind, we

617

just have to translate them.

618

619

http_client can raise exceptions of transport level (badly

620

formatted dialog, loss of connexion or socket level

621

problems). In that case we should issue the request again

622

(http_client will close and reopen a new connection if

623

needed).

624

"""

625

# When an exception occurs, we give back the original

626

# Traceback or the bugs are hard to diagnose.

627

exc_type, exc_val, exc_tb = sys.exc_info()

628

if exc_type == socket.gaierror:

629

# No need to retry, that will not help

630

if PY3:

631

origin_req_host = request.origin_req_host

632

else:

633

origin_req_host = request.get_origin_req_host()

634

raise errors.ConnectionError("Couldn't resolve host '%s'"

635

% origin_req_host,

636

orig_error=exc_val)

637

elif isinstance(exc_val, http_client.ImproperConnectionState):

638

# The http_client pipeline is in incorrect state, it's a bug in our

639

# implementation.

640

reraise(exc_type, exc_val, exc_tb)

641

else:

642

if first_try:

643

if self._debuglevel >= 2:

644

print('Received exception: [%r]' % exc_val)

645

print(' On connection: [%r]' % request.connection)

646

method = request.get_method()

647

url = request.get_full_url()

648

print(' Will retry, %s %r' % (method, url))

649

request.connection.close()

650

response = self.do_open(http_class, request, False)

651

else:

652

if self._debuglevel >= 2:

653

print('Received second exception: [%r]' % exc_val)

654

print(' On connection: [%r]' % request.connection)

655

if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):

656

# http_client.BadStatusLine and

657

# http_client.UnknownProtocol indicates that a

658

# bogus server was encountered or a bad

659

# connection (i.e. transient errors) is

660

# experimented, we have already retried once

661

# for that request so we raise the exception.

662

my_exception = errors.InvalidHttpResponse(

663

request.get_full_url(),

664

'Bad status line received',

665

orig_error=exc_val)

666

elif (isinstance(exc_val, socket.error) and len(exc_val.args)

667

and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):

668

# 10053 == WSAECONNABORTED

669

# 10054 == WSAECONNRESET

670

raise errors.ConnectionReset(

671

"Connection lost while sending request.")

672

else:

673

# All other exception are considered connection related.

674

675

# socket errors generally occurs for reasons

676

# far outside our scope, so closing the

677

# connection and retrying is the best we can

678

# do.

679

if PY3:

680

selector = request.selector

681

else:

682

selector = request.get_selector()

683

my_exception = errors.ConnectionError(

684

msg='while sending %s %s:' % (request.get_method(),

685

selector),

686

orig_error=exc_val)

687

688

if self._debuglevel >= 2:

689

print('On connection: [%r]' % request.connection)

690

method = request.get_method()

691

url = request.get_full_url()

692

print(' Failed again, %s %r' % (method, url))

693

print(' Will raise: [%r]' % my_exception)

694

reraise(type(my_exception), my_exception, exc_tb)

695

return response

696

697

def do_open(self, http_class, request, first_try=True):

698

"""See urllib_request.AbstractHTTPHandler.do_open for the general idea.

699

700

The request will be retried once if it fails.

701

"""

702

connection = request.connection

703

if connection is None:

704

raise AssertionError(

705

'Cannot process a request without a connection')

706

707

# Get all the headers

708

headers = {}

709

headers.update(request.header_items())

710

headers.update(request.unredirected_hdrs)

711

# Some servers or proxies will choke on headers not properly

712

# cased. http_client/urllib/urllib_request all use capitalize to get canonical

713

# header names, but only python2.5 urllib_request use title() to fix them just

714

# before sending the request. And not all versions of python 2.5 do

715

# that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it

716

# ourself below.

717

headers = {name.title(): val for name, val in headers.items()}

718

719

try:

720

method = request.get_method()

721

if PY3:

722

url = request.selector

723

else:

724

url = request.get_selector()

725

if sys.version_info[:2] >= (3, 6):

726

connection._send_request(method, url,

727

# FIXME: implements 100-continue

728

# None, # We don't send the body yet

729

request.data,

730

headers, encode_chunked=False)

731

else:

732

connection._send_request(method, url,

733

# FIXME: implements 100-continue

734

# None, # We don't send the body yet

735

request.data,

736

headers)

737

if 'http' in debug.debug_flags:

738

trace.mutter('> %s %s' % (method, url))

739

hdrs = []

740

for k, v in headers.items():

741

# People are often told to paste -Dhttp output to help

742

# debug. Don't compromise credentials.

743

if k in ('Authorization', 'Proxy-Authorization'):

744

v = '<masked>'

745

hdrs.append('%s: %s' % (k, v))

746

trace.mutter('> ' + '\n> '.join(hdrs) + '\n')

747

if self._debuglevel >= 1:

748

print('Request sent: [%r] from (%s)'

749

% (request, request.connection.sock.getsockname()))

750

response = connection.getresponse()

751

convert_to_addinfourl = True

752

except (ssl.SSLError, ssl.CertificateError):

753

# Something is wrong with either the certificate or the hostname,

754

# re-trying won't help

755

raise

756

except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,

757

socket.error, http_client.HTTPException):

758

response = self.retry_or_raise(http_class, request, first_try)

759

convert_to_addinfourl = False

760

761

if PY3:

762

response.msg = response.reason

763

return response

764

765

# FIXME: HTTPConnection does not fully support 100-continue (the

766

# server responses are just ignored)

767

768

# if code == 100:

769

# mutter('Will send the body')

770

# # We can send the body now

771

# body = request.data

772

# if body is None:

773

# raise URLError("No data given")

774

# connection.send(body)

775

# response = connection.getresponse()

776

777

if self._debuglevel >= 2:

778

print('Receives response: %r' % response)

779

print(' For: %r(%r)' % (request.get_method(),

780

request.get_full_url()))

781

782

if convert_to_addinfourl:

783

# Shamelessly copied from urllib_request

784

req = request

785

r = response

786

r.recv = r.read

787

fp = socket._fileobject(r, bufsize=65536)

788

resp = addinfourl(fp, r.msg, req.get_full_url())

789

resp.code = r.status

790

resp.msg = r.reason

791

resp.version = r.version

792

if self._debuglevel >= 2:

793

print('Create addinfourl: %r' % resp)

794

print(' For: %r(%r)' % (request.get_method(),

795

request.get_full_url()))

796

if 'http' in debug.debug_flags:

797

version = 'HTTP/%d.%d'

798

try:

799

version = version % (resp.version / 10,

800

resp.version % 10)

801

except:

802

version = 'HTTP/%r' % resp.version

803

trace.mutter('< %s %s %s' % (version, resp.code,

804

resp.msg))

805

# Use the raw header lines instead of treating resp.info() as a

806

# dict since we may miss duplicated headers otherwise.

807

hdrs = [h.rstrip('\r\n') for h in resp.info().headers]

808

trace.mutter('< ' + '\n< '.join(hdrs) + '\n')

809

else:

810

resp = response

811

return resp

812

813

814

class HTTPHandler(AbstractHTTPHandler):

815

"""A custom handler that just thunks into HTTPConnection"""

816

817

def http_open(self, request):

818

return self.do_open(HTTPConnection, request)

819

820

821

class HTTPSHandler(AbstractHTTPHandler):

822

"""A custom handler that just thunks into HTTPSConnection"""

823

824

https_request = AbstractHTTPHandler.http_request

825

826

def https_open(self, request):

827

connection = request.connection

828

if connection.sock is None and \

829

connection.proxied_host is not None and \

830

request.get_method() != 'CONNECT': # Don't loop

831

# FIXME: We need a gazillion connection tests here, but we still

832

# miss a https server :-( :

833

# - with and without proxy

834

# - with and without certificate

835

# - with self-signed certificate

836

# - with and without authentication

837

# - with good and bad credentials (especially the proxy auth around

838

# CONNECT)

839

# - with basic and digest schemes

840

# - reconnection on errors

841

# - connection persistence behaviour (including reconnection)

842

843

# We are about to connect for the first time via a proxy, we must

844

# issue a CONNECT request first to establish the encrypted link

845

connect = _ConnectRequest(request)

846

response = self.parent.open(connect)

847

if response.code != 200:

848

raise errors.ConnectionError("Can't connect to %s via proxy %s" % (

849

connect.proxied_host, self.host))

850

# Housekeeping

851

connection.cleanup_pipe()

852

# Establish the connection encryption

853

connection.connect_to_origin()

854

# Propagate the connection to the original request

855

request.connection = connection

856

return self.do_open(HTTPSConnection, request)

857

858

859

class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):

860

"""Handles redirect requests.

861

862

We have to implement our own scheme because we use a specific

863

Request object and because we want to implement a specific

864

policy.

865

"""

866

_debuglevel = DEBUG

867

# RFC2616 says that only read requests should be redirected

868

# without interacting with the user. But Breezy uses some

869

# shortcuts to optimize against roundtrips which can leads to

870

# write requests being issued before read requests of

871

# containing dirs can be redirected. So we redirect write

872

# requests in the same way which seems to respect the spirit

873

# of the RFC if not its letter.

874

875

def redirect_request(self, req, fp, code, msg, headers, newurl):

876

"""See urllib_request.HTTPRedirectHandler.redirect_request"""

877

# We would have preferred to update the request instead

878

# of creating a new one, but the urllib_request.Request object

879

# has a too complicated creation process to provide a

880

# simple enough equivalent update process. Instead, when

881

# redirecting, we only update the following request in

882

# the redirect chain with a reference to the parent

883

# request .

884

885

# Some codes make no sense in our context and are treated

886

# as errors:

887

888

# 300: Multiple choices for different representations of

889

# the URI. Using that mechanisn with Breezy will violate the

890

# protocol neutrality of Transport.

891

892

# 304: Not modified (SHOULD only occurs with conditional

893

# GETs which are not used by our implementation)

894

895

# 305: Use proxy. I can't imagine this one occurring in

896

# our context-- vila/20060909

897

898

# 306: Unused (if the RFC says so...)

899

900

# If the code is 302 and the request is HEAD, some may

901

# think that it is a sufficent hint that the file exists

902

# and that we MAY avoid following the redirections. But

903

# if we want to be sure, we MUST follow them.

904

905

if PY3:

906

origin_req_host = req.origin_req_host

907

else:

908

origin_req_host = req.get_origin_req_host()

909

910

if code in (301, 302, 303, 307, 308):

911

return Request(req.get_method(), newurl,

912

headers=req.headers,

913

origin_req_host=origin_req_host,

914

unverifiable=True,

915

# TODO: It will be nice to be able to

916

# detect virtual hosts sharing the same

917

# IP address, that will allow us to

918

# share the same connection...

919

connection=None,

920

parent=req,

921

)

922

else:

923

raise urllib_request.HTTPError(

924

req.get_full_url(), code, msg, headers, fp)

925

926

def http_error_302(self, req, fp, code, msg, headers):

927

"""Requests the redirected to URI.

928

929

Copied from urllib_request to be able to clean the pipe of the associated

930

connection, *before* issuing the redirected request but *after* having

931

eventually raised an error.

932

"""

933

# Some servers (incorrectly) return multiple Location headers

934

# (so probably same goes for URI). Use first header.

935

936

# TODO: Once we get rid of addinfourl objects, the

937

# following will need to be updated to use correct case

938

# for headers.

939

if 'location' in headers:

940

newurl = headers.get('location')

941

elif 'uri' in headers:

942

newurl = headers.get('uri')

943

else:

944

return

945

946

newurl = urljoin(req.get_full_url(), newurl)

947

948

if self._debuglevel >= 1:

949

print('Redirected to: %s (followed: %r)' % (newurl,

950

req.follow_redirections))

951

if req.follow_redirections is False:

952

req.redirected_to = newurl

953

return fp

954

955

# This call succeeds or raise an error. urllib_request returns

956

# if redirect_request returns None, but our

957

# redirect_request never returns None.

958

redirected_req = self.redirect_request(req, fp, code, msg, headers,

959

newurl)

960

961

# loop detection

962

# .redirect_dict has a key url if url was previously visited.

963

if hasattr(req, 'redirect_dict'):

964

visited = redirected_req.redirect_dict = req.redirect_dict

965

if (visited.get(newurl, 0) >= self.max_repeats or

966

len(visited) >= self.max_redirections):

967

raise urllib_request.HTTPError(req.get_full_url(), code,

968

self.inf_msg + msg, headers, fp)

969

else:

970

visited = redirected_req.redirect_dict = req.redirect_dict = {}

971

visited[newurl] = visited.get(newurl, 0) + 1

972

973

# We can close the fp now that we are sure that we won't

974

# use it with HTTPError.

975

fp.close()

976

# We have all we need already in the response

977

req.connection.cleanup_pipe()

978

979

return self.parent.open(redirected_req)

980

981

http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302

982

983

984

class ProxyHandler(urllib_request.ProxyHandler):

985

"""Handles proxy setting.

986

987

Copied and modified from urllib_request to be able to modify the request during

988

the request pre-processing instead of modifying it at _open time. As we

989

capture (or create) the connection object during request processing, _open

990

time was too late.

991

992

The main task is to modify the request so that the connection is done to

993

the proxy while the request still refers to the destination host.

994

995

Note: the proxy handling *may* modify the protocol used; the request may be

996

against an https server proxied through an http proxy. So, https_request

997

will be called, but later it's really http_open that will be called. This

998

explains why we don't have to call self.parent.open as the urllib_request did.

999

"""

1000

1001

# Proxies must be in front

1002

handler_order = 100

1003

_debuglevel = DEBUG

1004

1005

def __init__(self, proxies=None):

1006

urllib_request.ProxyHandler.__init__(self, proxies)

1007

# First, let's get rid of urllib_request implementation

1008

for type, proxy in self.proxies.items():

1009

if self._debuglevel >= 3:

1010

print('Will unbind %s_open for %r' % (type, proxy))

1011

delattr(self, '%s_open' % type)

1012

1013

def bind_scheme_request(proxy, scheme):

1014

if proxy is None:

1015

return

1016

scheme_request = scheme + '_request'

1017

if self._debuglevel >= 3:

1018

print('Will bind %s for %r' % (scheme_request, proxy))

1019

setattr(self, scheme_request,

1020

lambda request: self.set_proxy(request, scheme))

1021

# We are interested only by the http[s] proxies

1022

http_proxy = self.get_proxy_env_var('http')

1023

bind_scheme_request(http_proxy, 'http')

1024

https_proxy = self.get_proxy_env_var('https')

1025

bind_scheme_request(https_proxy, 'https')

1026

1027

def get_proxy_env_var(self, name, default_to='all'):

1028

"""Get a proxy env var.

1029

1030

Note that we indirectly rely on

1031

urllib.getproxies_environment taking into account the

1032

uppercased values for proxy variables.

1033

"""

1034

try:

1035

return self.proxies[name.lower()]

1036

except KeyError:

1037

if default_to is not None:

1038

# Try to get the alternate environment variable

1039

try:

1040

return self.proxies[default_to]

1041

except KeyError:

1042

pass

1043

return None

1044

1045

def proxy_bypass(self, host):

1046

"""Check if host should be proxied or not.

1047

1048

:returns: True to skip the proxy, False otherwise.

1049

"""

1050

no_proxy = self.get_proxy_env_var('no', default_to=None)

1051

bypass = self.evaluate_proxy_bypass(host, no_proxy)

1052

if bypass is None:

1053

# Nevertheless, there are platform-specific ways to

1054

# ignore proxies...

1055

return urllib_request.proxy_bypass(host)

1056

else:

1057

return bypass

1058

1059

def evaluate_proxy_bypass(self, host, no_proxy):

1060

"""Check the host against a comma-separated no_proxy list as a string.

1061

1062

:param host: ``host:port`` being requested

1063

1064

:param no_proxy: comma-separated list of hosts to access directly.

1065

1066

:returns: True to skip the proxy, False not to, or None to

1067

leave it to urllib.

1068

"""

1069

if no_proxy is None:

1070

# All hosts are proxied

1071

return False

1072

hhost, hport = splitport(host)

1073

# Does host match any of the domains mentioned in

1074

# no_proxy ? The rules about what is authorized in no_proxy

1075

# are fuzzy (to say the least). We try to allow most

1076

# commonly seen values.

1077

for domain in no_proxy.split(','):

1078

domain = domain.strip()

1079

if domain == '':

1080

continue

1081

dhost, dport = splitport(domain)

1082

if hport == dport or dport is None:

1083

# Protect glob chars

1084

dhost = dhost.replace(".", r"\.")

1085

dhost = dhost.replace("*", r".*")

1086

dhost = dhost.replace("?", r".")

1087

if re.match(dhost, hhost, re.IGNORECASE):

1088

return True

1089

# Nothing explicitly avoid the host

1090

return None

1091

1092

def set_proxy(self, request, type):

1093

if PY3:

1094

host = request.host

1095

else:

1096

host = request.get_host()

1097

if self.proxy_bypass(host):

1098

return request

1099

1100

proxy = self.get_proxy_env_var(type)

1101

if self._debuglevel >= 3:

1102

print('set_proxy %s_request for %r' % (type, proxy))

1103

# FIXME: python 2.5 urlparse provides a better _parse_proxy which can

1104

# grok user:password@host:port as well as

1105

# http://user:password@host:port

1106

1107

parsed_url = transport.ConnectedTransport._split_url(proxy)

1108

if not parsed_url.host:

1109

raise urlutils.InvalidURL(proxy, 'No host component')

1110

1111

if request.proxy_auth == {}:

1112

# No proxy auth parameter are available, we are handling the first

1113

# proxied request, intialize. scheme (the authentication scheme)

1114

# and realm will be set by the AuthHandler

1115

request.proxy_auth = {

1116

'host': parsed_url.host,

1117

'port': parsed_url.port,

1118

'user': parsed_url.user,

1119

'password': parsed_url.password,

1120

'protocol': parsed_url.scheme,

1121

# We ignore path since we connect to a proxy

1122

'path': None}

1123

if parsed_url.port is None:

1124

phost = parsed_url.host

1125

else:

1126

phost = parsed_url.host + ':%d' % parsed_url.port

1127

request.set_proxy(phost, type)

1128

if self._debuglevel >= 3:

1129

print('set_proxy: proxy set to %s://%s' % (type, phost))

1130

return request

1131

1132

1133

class AbstractAuthHandler(urllib_request.BaseHandler):

1134

"""A custom abstract authentication handler for all http authentications.

1135

1136

Provides the meat to handle authentication errors and

1137

preventively set authentication headers after the first

1138

successful authentication.

1139

1140

This can be used for http and proxy, as well as for basic, negotiate and

1141

digest authentications.

1142

1143

This provides an unified interface for all authentication handlers

1144

(urllib_request provides far too many with different policies).

1145

1146

The interaction between this handler and the urllib_request

1147

framework is not obvious, it works as follow:

1148

1149

opener.open(request) is called:

1150

1151

- that may trigger http_request which will add an authentication header

1152

(self.build_header) if enough info is available.

1153

1154

- the request is sent to the server,

1155

1156

- if an authentication error is received self.auth_required is called,

1157

we acquire the authentication info in the error headers and call

1158

self.auth_match to check that we are able to try the

1159

authentication and complete the authentication parameters,

1160

1161

- we call parent.open(request), that may trigger http_request

1162

and will add a header (self.build_header), but here we have

1163

all the required info (keep in mind that the request and

1164

authentication used in the recursive calls are really (and must be)

1165

the *same* objects).

1166

1167

- if the call returns a response, the authentication have been

1168

successful and the request authentication parameters have been updated.

1169

"""

1170

1171

scheme = None

1172

"""The scheme as it appears in the server header (lower cased)"""

1173

1174

_max_retry = 3

1175

"""We don't want to retry authenticating endlessly"""

1176

1177

requires_username = True

1178

"""Whether the auth mechanism requires a username."""

1179

1180

# The following attributes should be defined by daughter

1181

# classes:

1182

# - auth_required_header: the header received from the server

1183

# - auth_header: the header sent in the request

1184

1185

def __init__(self):

1186

# We want to know when we enter into an try/fail cycle of

1187

# authentications so we initialize to None to indicate that we aren't

1188

# in such a cycle by default.

1189

self._retry_count = None

1190

1191

def _parse_auth_header(self, server_header):

1192

"""Parse the authentication header.

1193

1194

:param server_header: The value of the header sent by the server

1195

describing the authenticaion request.

1196

1197

:return: A tuple (scheme, remainder) scheme being the first word in the

1198

given header (lower cased), remainder may be None.

1199

"""

1200

try:

1201

scheme, remainder = server_header.split(None, 1)

1202

except ValueError:

1203

scheme = server_header

1204

remainder = None

1205

return (scheme.lower(), remainder)

1206

1207

def update_auth(self, auth, key, value):

1208

"""Update a value in auth marking the auth as modified if needed"""

1209

old_value = auth.get(key, None)

1210

if old_value != value:

1211

auth[key] = value

1212

auth['modified'] = True

1213

1214

def auth_required(self, request, headers):

1215

"""Retry the request if the auth scheme is ours.

1216

1217

:param request: The request needing authentication.

1218

:param headers: The headers for the authentication error response.

1219

:return: None or the response for the authenticated request.

1220

"""

1221

# Don't try to authenticate endlessly

1222

if self._retry_count is None:

1223

# The retry being recusrsive calls, None identify the first retry

1224

self._retry_count = 1

1225

else:

1226

self._retry_count += 1

1227

if self._retry_count > self._max_retry:

1228

# Let's be ready for next round

1229

self._retry_count = None

1230

return None

1231

if PY3:

1232

server_headers = headers.get_all(self.auth_required_header)

1233

else:

1234

server_headers = headers.getheaders(self.auth_required_header)

1235

if not server_headers:

1236

# The http error MUST have the associated

1237

# header. This must never happen in production code.

1238

trace.mutter('%s not found', self.auth_required_header)

1239

return None

1240

1241

auth = self.get_auth(request)

1242

auth['modified'] = False

1243

# Put some common info in auth if the caller didn't

1244

if auth.get('path', None) is None:

1245

parsed_url = urlutils.URL.from_string(request.get_full_url())

1246

self.update_auth(auth, 'protocol', parsed_url.scheme)

1247

self.update_auth(auth, 'host', parsed_url.host)

1248

self.update_auth(auth, 'port', parsed_url.port)

1249

self.update_auth(auth, 'path', parsed_url.path)

1250

# FIXME: the auth handler should be selected at a single place instead

1251

# of letting all handlers try to match all headers, but the current

1252

# design doesn't allow a simple implementation.

1253

for server_header in server_headers:

1254

# Several schemes can be proposed by the server, try to match each

1255

# one in turn

1256

matching_handler = self.auth_match(server_header, auth)

1257

if matching_handler:

1258

# auth_match may have modified auth (by adding the

1259

# password or changing the realm, for example)

1260

if (request.get_header(self.auth_header, None) is not None

1261

and not auth['modified']):

1262

# We already tried that, give up

1263

return None

1264

1265

# Only the most secure scheme proposed by the server should be

1266

# used, since the handlers use 'handler_order' to describe that

1267

# property, the first handler tried takes precedence, the

1268

# others should not attempt to authenticate if the best one

1269

# failed.

1270

best_scheme = auth.get('best_scheme', None)

1271

if best_scheme is None:

1272

# At that point, if current handler should doesn't succeed

1273

# the credentials are wrong (or incomplete), but we know

1274

# that the associated scheme should be used.

1275

best_scheme = auth['best_scheme'] = self.scheme

1276

if best_scheme != self.scheme:

1277

continue

1278

1279

if self.requires_username and auth.get('user', None) is None:

1280

# Without a known user, we can't authenticate

1281

return None

1282

1283

# Housekeeping

1284

request.connection.cleanup_pipe()

1285

# Retry the request with an authentication header added

1286

response = self.parent.open(request)

1287

if response:

1288

self.auth_successful(request, response)

1289

return response

1290

# We are not qualified to handle the authentication.

1291

# Note: the authentication error handling will try all

1292

# available handlers. If one of them authenticates

1293

# successfully, a response will be returned. If none of

1294

# them succeeds, None will be returned and the error

1295

# handler will raise the 401 'Unauthorized' or the 407

1296

# 'Proxy Authentication Required' error.

1297

return None

1298

1299

def add_auth_header(self, request, header):

1300

"""Add the authentication header to the request"""

1301

request.add_unredirected_header(self.auth_header, header)

1302

1303

def auth_match(self, header, auth):

1304

"""Check that we are able to handle that authentication scheme.

1305

1306

The request authentication parameters may need to be

1307

updated with info from the server. Some of these

1308

parameters, when combined, are considered to be the

1309

authentication key, if one of them change the

1310

authentication result may change. 'user' and 'password'

1311

are exampls, but some auth schemes may have others

1312

(digest's nonce is an example, digest's nonce_count is a

1313

*counter-example*). Such parameters must be updated by

1314

using the update_auth() method.

1315

1316

:param header: The authentication header sent by the server.

1317

:param auth: The auth parameters already known. They may be

1318

updated.

1319

:returns: True if we can try to handle the authentication.

1320

"""

1321

raise NotImplementedError(self.auth_match)

1322

1323

def build_auth_header(self, auth, request):

1324

"""Build the value of the header used to authenticate.

1325

1326

:param auth: The auth parameters needed to build the header.

1327

:param request: The request needing authentication.

1328

1329

:return: None or header.

1330

"""

1331

raise NotImplementedError(self.build_auth_header)

1332

1333

def auth_successful(self, request, response):

1334

"""The authentification was successful for the request.

1335

1336

Additional infos may be available in the response.

1337

1338

:param request: The succesfully authenticated request.

1339

:param response: The server response (may contain auth info).

1340

"""

1341

# It may happen that we need to reconnect later, let's be ready

1342

self._retry_count = None

1343

1344

def get_user_password(self, auth):

1345

"""Ask user for a password if none is already available.

1346

1347

:param auth: authentication info gathered so far (from the initial url

1348

and then during dialog with the server).

1349

"""

1350

auth_conf = config.AuthenticationConfig()

1351

user = auth.get('user', None)

1352

password = auth.get('password', None)

1353

realm = auth['realm']

1354

port = auth.get('port', None)

1355

1356

if user is None:

1357

user = auth_conf.get_user(auth['protocol'], auth['host'],

1358

port=port, path=auth['path'],

1359

realm=realm, ask=True,

1360

prompt=self.build_username_prompt(auth))

1361

if user is not None and password is None:

1362

password = auth_conf.get_password(

1363

auth['protocol'], auth['host'], user,

1364

port=port,

1365

path=auth['path'], realm=realm,

1366

prompt=self.build_password_prompt(auth))

1367

1368

return user, password

1369

1370

def _build_password_prompt(self, auth):

1371

"""Build a prompt taking the protocol used into account.

1372

1373

The AuthHandler is used by http and https, we want that information in

1374

the prompt, so we build the prompt from the authentication dict which

1375

contains all the needed parts.

1376

1377

Also, http and proxy AuthHandlers present different prompts to the

1378

user. The daughter classes should implements a public

1379

build_password_prompt using this method.

1380

"""

1381

prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'

1382

realm = auth['realm']

1383

if realm is not None:

1384

prompt += u", Realm: '%s'" % realm

1385

prompt += u' password'

1386

return prompt

1387

1388

def _build_username_prompt(self, auth):

1389

"""Build a prompt taking the protocol used into account.

1390

1391

The AuthHandler is used by http and https, we want that information in

1392

the prompt, so we build the prompt from the authentication dict which

1393

contains all the needed parts.

1394

1395

Also, http and proxy AuthHandlers present different prompts to the

1396

user. The daughter classes should implements a public

1397

build_username_prompt using this method.

1398

"""

1399

prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'

1400

realm = auth['realm']

1401

if realm is not None:

1402

prompt += u", Realm: '%s'" % realm

1403

prompt += u' username'

1404

return prompt

1405

1406

def http_request(self, request):

1407

"""Insert an authentication header if information is available"""

1408

auth = self.get_auth(request)

1409

if self.auth_params_reusable(auth):

1410

self.add_auth_header(

1411

request, self.build_auth_header(auth, request))

1412

return request

1413

1414

https_request = http_request # FIXME: Need test

1415

1416

1417

class NegotiateAuthHandler(AbstractAuthHandler):

1418

"""A authentication handler that handles WWW-Authenticate: Negotiate.

1419

1420

At the moment this handler supports just Kerberos. In the future,

1421

NTLM support may also be added.

1422

"""

1423

1424

scheme = 'negotiate'

1425

handler_order = 480

1426

requires_username = False

1427

1428

def auth_match(self, header, auth):

1429

scheme, raw_auth = self._parse_auth_header(header)

1430

if scheme != self.scheme:

1431

return False

1432

self.update_auth(auth, 'scheme', scheme)

1433

resp = self._auth_match_kerberos(auth)

1434

if resp is None:

1435

return False

1436

# Optionally should try to authenticate using NTLM here

1437

self.update_auth(auth, 'negotiate_response', resp)

1438

return True

1439

1440

def _auth_match_kerberos(self, auth):

1441

"""Try to create a GSSAPI response for authenticating against a host."""

1442

global kerberos, checked_kerberos

1443

if kerberos is None and not checked_kerberos:

1444

try:

1445

import kerberos

1446

except ImportError:

1447

kerberos = None

1448

checked_kerberos = True

1449

if kerberos is None:

1450

return None

1451

ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)

1452

if ret < 1:

1453

trace.warning('Unable to create GSSAPI context for %s: %d',

1454

auth['host'], ret)

1455

return None

1456

ret = kerberos.authGSSClientStep(vc, "")

1457

if ret < 0:

1458

trace.mutter('authGSSClientStep failed: %d', ret)

1459

return None

1460

return kerberos.authGSSClientResponse(vc)

1461

1462

def build_auth_header(self, auth, request):

1463

return "Negotiate %s" % auth['negotiate_response']

1464

1465

def auth_params_reusable(self, auth):

1466

# If the auth scheme is known, it means a previous

1467

# authentication was successful, all information is

1468

# available, no further checks are needed.

1469

return (auth.get('scheme', None) == 'negotiate' and

1470

auth.get('negotiate_response', None) is not None)

1471

1472

1473

class BasicAuthHandler(AbstractAuthHandler):

1474

"""A custom basic authentication handler."""

1475

1476

scheme = 'basic'

1477

handler_order = 500

1478

auth_regexp = re.compile('realm="([^"]*)"', re.I)

1479

1480

def build_auth_header(self, auth, request):

1481

raw = '%s:%s' % (auth['user'], auth['password'])

1482

auth_header = 'Basic ' + \

1483

base64.b64encode(raw.encode('utf-8')).decode('ascii')

1484

return auth_header

1485

1486

def extract_realm(self, header_value):

1487

match = self.auth_regexp.search(header_value)

1488

realm = None

1489

if match:

1490

realm = match.group(1)

1491

return match, realm

1492

1493

def auth_match(self, header, auth):

1494

scheme, raw_auth = self._parse_auth_header(header)

1495

if scheme != self.scheme:

1496

return False

1497

1498

match, realm = self.extract_realm(raw_auth)

1499

if match:

1500

# Put useful info into auth

1501

self.update_auth(auth, 'scheme', scheme)

1502

self.update_auth(auth, 'realm', realm)

1503

if (auth.get('user', None) is None

1504

or auth.get('password', None) is None):

1505

user, password = self.get_user_password(auth)

1506

self.update_auth(auth, 'user', user)

1507

self.update_auth(auth, 'password', password)

1508

return match is not None

1509

1510

def auth_params_reusable(self, auth):

1511

# If the auth scheme is known, it means a previous

1512

# authentication was successful, all information is

1513

# available, no further checks are needed.

1514

return auth.get('scheme', None) == 'basic'

1515

1516

1517

def get_digest_algorithm_impls(algorithm):

1518

H = None

1519

KD = None

1520

if algorithm == 'MD5':

1521

def H(x): return osutils.md5(x).hexdigest()

1522

elif algorithm == 'SHA':

1523

H = osutils.sha_string

1524

if H is not None:

1525

def KD(secret, data): return H(

1526

("%s:%s" % (secret, data)).encode('utf-8'))

1527

return H, KD

1528

1529

1530

def get_new_cnonce(nonce, nonce_count):

1531

raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),

1532

osutils.rand_chars(8))

1533

return osutils.sha_string(raw.encode('utf-8'))[:16]

1534

1535

1536

class DigestAuthHandler(AbstractAuthHandler):

1537

"""A custom digest authentication handler."""

1538

1539

scheme = 'digest'

1540

# Before basic as digest is a bit more secure and should be preferred

1541

handler_order = 490

1542

1543

def auth_params_reusable(self, auth):

1544

# If the auth scheme is known, it means a previous

1545

# authentication was successful, all information is

1546

# available, no further checks are needed.

1547

return auth.get('scheme', None) == 'digest'

1548

1549

def auth_match(self, header, auth):

1550

scheme, raw_auth = self._parse_auth_header(header)

1551

if scheme != self.scheme:

1552

return False

1553

1554

# Put the requested authentication info into a dict

1555

req_auth = urllib_request.parse_keqv_list(

1556

urllib_request.parse_http_list(raw_auth))

1557

1558

# Check that we can handle that authentication

1559

qop = req_auth.get('qop', None)

1560

if qop != 'auth': # No auth-int so far

1561

return False

1562

1563

H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))

1564

if H is None:

1565

return False

1566

1567

realm = req_auth.get('realm', None)

1568

# Put useful info into auth

1569

self.update_auth(auth, 'scheme', scheme)

1570

self.update_auth(auth, 'realm', realm)

1571

if auth.get('user', None) is None or auth.get('password', None) is None:

1572

user, password = self.get_user_password(auth)

1573

self.update_auth(auth, 'user', user)

1574

self.update_auth(auth, 'password', password)

1575

1576

try:

1577

if req_auth.get('algorithm', None) is not None:

1578

self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))

1579

nonce = req_auth['nonce']

1580

if auth.get('nonce', None) != nonce:

1581

# A new nonce, never used

1582

self.update_auth(auth, 'nonce_count', 0)

1583

self.update_auth(auth, 'nonce', nonce)

1584

self.update_auth(auth, 'qop', qop)

1585

auth['opaque'] = req_auth.get('opaque', None)

1586

except KeyError:

1587

# Some required field is not there

1588

return False

1589

1590

return True

1591

1592

def build_auth_header(self, auth, request):

1593

if PY3:

1594

selector = request.selector

1595

else:

1596

selector = request.get_selector()

1597

url_scheme, url_selector = splittype(selector)

1598

sel_host, uri = splithost(url_selector)

1599

1600

A1 = ('%s:%s:%s' %

1601

(auth['user'], auth['realm'], auth['password'])).encode('utf-8')

1602

A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')

1603

1604

nonce = auth['nonce']

1605

qop = auth['qop']

1606

1607

nonce_count = auth['nonce_count'] + 1

1608

ncvalue = '%08x' % nonce_count

1609

cnonce = get_new_cnonce(nonce, nonce_count)

1610

1611

H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))

1612

nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))

1613

request_digest = KD(H(A1), nonce_data)

1614

1615

header = 'Digest '

1616

header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],

1617

auth['realm'],

1618

nonce)

1619

header += ', uri="%s"' % uri

1620

header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)

1621

header += ', qop="%s"' % qop

1622

header += ', response="%s"' % request_digest

1623

# Append the optional fields

1624

opaque = auth.get('opaque', None)

1625

if opaque:

1626

header += ', opaque="%s"' % opaque

1627

if auth.get('algorithm', None):

1628

header += ', algorithm="%s"' % auth.get('algorithm')

1629

1630

# We have used the nonce once more, update the count

1631

auth['nonce_count'] = nonce_count

1632

1633

return header

1634

1635

1636

class HTTPAuthHandler(AbstractAuthHandler):

1637

"""Custom http authentication handler.

1638

1639

Send the authentication preventively to avoid the roundtrip

1640

associated with the 401 error and keep the revelant info in

1641

the auth request attribute.

1642

"""

1643

1644

auth_required_header = 'www-authenticate'

1645

auth_header = 'Authorization'

1646

1647

def get_auth(self, request):

1648

"""Get the auth params from the request"""

1649

return request.auth

1650

1651

def set_auth(self, request, auth):

1652

"""Set the auth params for the request"""

1653

request.auth = auth

1654

1655

def build_password_prompt(self, auth):

1656

return self._build_password_prompt(auth)

1657

1658

def build_username_prompt(self, auth):

1659

return self._build_username_prompt(auth)

1660

1661

def http_error_401(self, req, fp, code, msg, headers):

1662

return self.auth_required(req, headers)

1663

1664

1665

class ProxyAuthHandler(AbstractAuthHandler):

1666

"""Custom proxy authentication handler.

1667

1668

Send the authentication preventively to avoid the roundtrip

1669

associated with the 407 error and keep the revelant info in

1670

the proxy_auth request attribute..

1671

"""

1672

1673

auth_required_header = 'proxy-authenticate'

1674

# FIXME: the correct capitalization is Proxy-Authorization,

1675

# but python-2.4 urllib_request.Request insist on using capitalize()

1676

# instead of title().

1677

auth_header = 'Proxy-authorization'

1678

1679

def get_auth(self, request):

1680

"""Get the auth params from the request"""

1681

return request.proxy_auth

1682

1683

def set_auth(self, request, auth):

1684

"""Set the auth params for the request"""

1685

request.proxy_auth = auth

1686

1687

def build_password_prompt(self, auth):

1688

prompt = self._build_password_prompt(auth)

1689

prompt = u'Proxy ' + prompt

1690

return prompt

1691

1692

def build_username_prompt(self, auth):

1693

prompt = self._build_username_prompt(auth)

1694

prompt = u'Proxy ' + prompt

1695

return prompt

1696

1697

def http_error_407(self, req, fp, code, msg, headers):

1698

return self.auth_required(req, headers)

1699

1700

1701

class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):

1702

"""Custom http basic authentication handler"""

1703

1704

1705

class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):

1706

"""Custom proxy basic authentication handler"""

1707

1708

1709

class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):

1710

"""Custom http basic authentication handler"""

1711

1712

1713

class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):

1714

"""Custom proxy basic authentication handler"""

1715

1716

1717

class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):

1718

"""Custom http negotiate authentication handler"""

1719

1720

1721

class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):

1722

"""Custom proxy negotiate authentication handler"""

1723

1724

1725

class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):

1726

"""Process HTTP error responses.

1727

1728

We don't really process the errors, quite the contrary

1729

instead, we leave our Transport handle them.

1730

"""

1731

1732

accepted_errors = [200, # Ok

1733

201,

1734

202,

1735

204,

1736

206, # Partial content

1737

400,

1738

403,

1739

404, # Not found

1740

405, # Method not allowed

1741

406, # Not Acceptable

1742

409, # Conflict

1743

416, # Range not satisfiable

1744

422, # Unprocessible entity

1745

501, # Not implemented

1746

]

1747

"""The error codes the caller will handle.

1748

1749

This can be specialized in the request on a case-by case basis, but the

1750

common cases are covered here.

1751

"""

1752

1753

def http_response(self, request, response):

1754

code, msg, hdrs = response.code, response.msg, response.info()

1755

1756

if code not in self.accepted_errors:

1757

response = self.parent.error('http', request, response,

1758

code, msg, hdrs)

1759

return response

1760

1761

https_response = http_response

1762

1763

1764

class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):

1765

"""Translate common errors into Breezy Exceptions"""

1766

1767

def http_error_default(self, req, fp, code, msg, hdrs):

1768

if code == 403:

1769

raise errors.TransportError(

1770

'Server refuses to fulfill the request (403 Forbidden)'

1771

' for %s' % req.get_full_url())

1772

else:

1773

raise errors.UnexpectedHttpStatus(

1774

req.get_full_url(), code,

1775

'Unable to handle http code: %s' % msg)

1776

1777

1778

class Opener(object):

1779

"""A wrapper around urllib_request.build_opener

1780

1781

Daughter classes can override to build their own specific opener

1782

"""

1783

# TODO: Provides hooks for daughter classes.

1784

1785

def __init__(self,

1786

connection=ConnectionHandler,

1787

redirect=HTTPRedirectHandler,

1788

error=HTTPErrorProcessor,

1789

report_activity=None,

1790

ca_certs=None):

1791

self._opener = urllib_request.build_opener(

1792

connection(report_activity=report_activity, ca_certs=ca_certs),

1793

redirect, error,

1794

ProxyHandler(),

1795

HTTPBasicAuthHandler(),

1796

HTTPDigestAuthHandler(),

1797

HTTPNegotiateAuthHandler(),

1798

ProxyBasicAuthHandler(),

1799

ProxyDigestAuthHandler(),

1800

ProxyNegotiateAuthHandler(),

1801

HTTPHandler,

1802

HTTPSHandler,

1803

HTTPDefaultErrorHandler,

1804

)

1805

1806

self.open = self._opener.open

1807

if DEBUG >= 9:

1808

# When dealing with handler order, it's easy to mess

1809

# things up, the following will help understand which

1810

# handler is used, when and for what.

1811

import pprint

1812

pprint.pprint(self._opener.__dict__)

1813

1814

1815

class HttpTransport(ConnectedTransport):

1816

"""HTTP Client implementations.

1817

1818

The protocol can be given as e.g. http+urllib://host/ to use a particular

1819

implementation.

1820

"""

1821

1822

# _unqualified_scheme: "http" or "https"

1823

# _scheme: may have "+pycurl", etc

1824

1825

# In order to debug we have to issue our traces in sync with

1826

# httplib, which use print :(

1827

_debuglevel = 0

1828

1829

def __init__(self, base, _from_transport=None, ca_certs=None):

1830

"""Set the base path where files will be stored."""

1831

proto_match = re.match(r'^(https?)(\+\w+)?://', base)

1832

if not proto_match:

1833

raise AssertionError("not a http url: %r" % base)

1834

self._unqualified_scheme = proto_match.group(1)

1835

super(HttpTransport, self).__init__(

1836

base, _from_transport=_from_transport)

1837

self._medium = None

1838

# range hint is handled dynamically throughout the life

1839

# of the transport object. We start by trying multi-range

1840

# requests and if the server returns bogus results, we

1841

# retry with single range requests and, finally, we

1842

# forget about range if the server really can't

1843

# understand. Once acquired, this piece of info is

1844

# propagated to clones.

1845

if _from_transport is not None:

1846

self._range_hint = _from_transport._range_hint

1847

self._opener = _from_transport._opener

1848

else:

1849

self._range_hint = 'multi'

1850

self._opener = Opener(

1851

report_activity=self._report_activity, ca_certs=ca_certs)

1852

1853

def request(self, method, url, fields=None, headers=None, **urlopen_kw):

1854

body = urlopen_kw.pop('body', None)

1855

if fields is not None:

1856

data = urlencode(fields).encode()

1857

if body is not None:

1858

raise ValueError(

1859

'body and fields are mutually exclusive')

1860

else:

1861

data = body

1862

if headers is None:

1863

headers = {}

1864

request = Request(method, url, data, headers)

1865

request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)

1866

if urlopen_kw:

1867

raise NotImplementedError(

1868

'unknown arguments: %r' % urlopen_kw.keys())

1869

connection = self._get_connection()

1870

if connection is not None:

1871

# Give back shared info

1872

request.connection = connection

1873

(auth, proxy_auth) = self._get_credentials()

1874

# Clean the httplib.HTTPConnection pipeline in case the previous

1875

# request couldn't do it

1876

connection.cleanup_pipe()

1877

else:

1878

# First request, initialize credentials.

1879

# scheme and realm will be set by the _urllib2_wrappers.AuthHandler

1880

auth = self._create_auth()

1881

# Proxy initialization will be done by the first proxied request

1882

proxy_auth = dict()

1883

# Ensure authentication info is provided

1884

request.auth = auth

1885

request.proxy_auth = proxy_auth

1886

1887

if self._debuglevel > 0:

1888

print('perform: %s base: %s, url: %s' % (request.method, self.base,

1889

request.get_full_url()))

1890

response = self._opener.open(request)

1891

if self._get_connection() is not request.connection:

1892

# First connection or reconnection

1893

self._set_connection(request.connection,

1894

(request.auth, request.proxy_auth))

1895

else:

1896

# http may change the credentials while keeping the

1897

# connection opened

1898

self._update_credentials((request.auth, request.proxy_auth))

1899

1900

code = response.code

1901

if (request.follow_redirections is False

1902

and code in (301, 302, 303, 307, 308)):

1903

raise errors.RedirectRequested(request.get_full_url(),

1904

request.redirected_to,

1905

is_permanent=(code in (301, 308)))

1906

1907

if request.redirected_to is not None:

1908

trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),

1909

request.redirected_to))

1910

1911

class Urllib3LikeResponse(object):

1912

1913

def __init__(self, actual):

1914

self._actual = actual

1915

self._data = None

1916

1917

def getheader(self, name, default=None):

1918

if self._actual.headers is None:

1919

raise http_client.ResponseNotReady()

1920

if PY3:

1921

return self._actual.headers.get(name, default)

1922

else:

1923

return self._actual.headers.getheader(name, default)

1924

1925

def getheaders(self):

1926

if self._actual.headers is None:

1927

raise http_client.ResponseNotReady()

1928

return list(self._actual.headers.items())

1929

1930

@property

1931

def status(self):

1932

return self._actual.code

1933

1934

@property

1935

def reason(self):

1936

return self._actual.reason

1937

1938

@property

1939

def data(self):

1940

if self._data is None:

1941

self._data = self._actual.read()

1942

return self._data

1943

1944

@property

1945

def text(self):

1946

if self.status == 204:

1947

return None

1948

charset = cgi.parse_header(

1949

self._actual.headers['Content-Type'])[1].get('charset')

1950

if charset:

1951

return self.data.decode(charset)

1952

else:

1953

return self.data.decode()

1954

1955

def read(self, amt=None):

1956

return self._actual.read(amt)

1957

1958

def readlines(self):

1959

return self._actual.readlines()

1960

1961

def readline(self, size=-1):

1962

return self._actual.readline(size)

1963

1964

return Urllib3LikeResponse(response)

1965

1966

def disconnect(self):

1967

connection = self._get_connection()

1968

if connection is not None:

1969

connection.close()

1970

1971

def has(self, relpath):

1972

"""Does the target location exist?

1973

"""

1974

response = self._head(relpath)

1975

1976

code = response.status

1977

if code == 200: # "ok",

1978

return True

1979

else:

1980

return False

1981

1982

def get(self, relpath):

1983

"""Get the file at the given relative path.

1984

1985

:param relpath: The relative path to the file

1986

"""

1987

code, response_file = self._get(relpath, None)

1988

return response_file

1989

1990

def _get(self, relpath, offsets, tail_amount=0):

1991

"""Get a file, or part of a file.

1992

1993

:param relpath: Path relative to transport base URL

1994

:param offsets: None to get the whole file;

1995

or a list of _CoalescedOffset to fetch parts of a file.

1996

:param tail_amount: The amount to get from the end of the file.

1997

1998

:returns: (http_code, result_file)

1999

"""

2000

abspath = self._remote_path(relpath)

2001

headers = {}

2002

if offsets or tail_amount:

2003

range_header = self._attempted_range_header(offsets, tail_amount)

2004

if range_header is not None:

2005

bytes = 'bytes=' + range_header

2006

headers = {'Range': bytes}

2007

else:

2008

range_header = None

2009

2010

response = self.request('GET', abspath, headers=headers)

2011

2012

if response.status == 404: # not found

2013

raise errors.NoSuchFile(abspath)

2014

elif response.status == 416:

2015

# We don't know which, but one of the ranges we specified was

2016

# wrong.

2017

raise errors.InvalidHttpRange(abspath, range_header,

2018

'Server return code %d' % response.status)

2019

elif response.status == 400:

2020

if range_header:

2021

# We don't know which, but one of the ranges we specified was

2022

# wrong.

2023

raise errors.InvalidHttpRange(

2024

abspath, range_header,

2025

'Server return code %d' % response.status)

2026

else:

2027

raise errors.BadHttpRequest(abspath, response.reason)

2028

elif response.status not in (200, 206):

2029

raise errors.UnexpectedHttpStatus(abspath, response.status)

2030

2031

data = handle_response(

2032

abspath, response.status, response.getheader, response)

2033

return response.status, data

2034

2035

def _remote_path(self, relpath):

2036

"""See ConnectedTransport._remote_path.

2037

2038

user and passwords are not embedded in the path provided to the server.

2039

"""

2040

url = self._parsed_url.clone(relpath)

2041

url.user = url.quoted_user = None

2042

url.password = url.quoted_password = None

2043

url.scheme = self._unqualified_scheme

2044

return str(url)

2045

2046

def _create_auth(self):

2047

"""Returns a dict containing the credentials provided at build time."""

2048

auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,

2049

user=self._parsed_url.user, password=self._parsed_url.password,

2050

protocol=self._unqualified_scheme,

2051

path=self._parsed_url.path)

2052

return auth

2053

2054

def get_smart_medium(self):

2055

"""See Transport.get_smart_medium."""

2056

if self._medium is None:

2057

# Since medium holds some state (smart server probing at least), we

2058

# need to keep it around. Note that this is needed because medium

2059

# has the same 'base' attribute as the transport so it can't be

2060

# shared between transports having different bases.

2061

self._medium = SmartClientHTTPMedium(self)

2062

return self._medium

2063

2064

def _degrade_range_hint(self, relpath, ranges):

2065

if self._range_hint == 'multi':

2066

self._range_hint = 'single'

2067

mutter('Retry "%s" with single range request' % relpath)

2068

elif self._range_hint == 'single':

2069

self._range_hint = None

2070

mutter('Retry "%s" without ranges' % relpath)

2071

else:

2072

# We tried all the tricks, but nothing worked, caller must reraise.

2073

return False

2074

return True

2075

2076

# _coalesce_offsets is a helper for readv, it try to combine ranges without

2077

# degrading readv performances. _bytes_to_read_before_seek is the value

2078

# used for the limit parameter and has been tuned for other transports. For

2079

# HTTP, the name is inappropriate but the parameter is still useful and

2080

# helps reduce the number of chunks in the response. The overhead for a

2081

# chunk (headers, length, footer around the data itself is variable but

2082

# around 50 bytes. We use 128 to reduce the range specifiers that appear in

2083

# the header, some servers (notably Apache) enforce a maximum length for a

2084

# header and issue a '400: Bad request' error when too much ranges are

2085

# specified.

2086

_bytes_to_read_before_seek = 128

2087

# No limit on the offset number that get combined into one, we are trying

2088

# to avoid downloading the whole file.

2089

_max_readv_combine = 0

2090

# By default Apache has a limit of ~400 ranges before replying with a 400

2091

# Bad Request. So we go underneath that amount to be safe.

2092

_max_get_ranges = 200

2093

# We impose no limit on the range size. But see _pycurl.py for a different

2094

# use.

2095

_get_max_size = 0

2096

2097

def _readv(self, relpath, offsets):

2098

"""Get parts of the file at the given relative path.

2099

2100

:param offsets: A list of (offset, size) tuples.

2101

:param return: A list or generator of (offset, data) tuples

2102

"""

2103

# offsets may be a generator, we will iterate it several times, so

2104

# build a list

2105

offsets = list(offsets)

2106

2107

try_again = True

2108

retried_offset = None

2109

while try_again:

2110

try_again = False

2111

2112

# Coalesce the offsets to minimize the GET requests issued

2113

sorted_offsets = sorted(offsets)

2114

coalesced = self._coalesce_offsets(

2115

sorted_offsets, limit=self._max_readv_combine,

2116

fudge_factor=self._bytes_to_read_before_seek,

2117

max_size=self._get_max_size)

2118

2119

# Turn it into a list, we will iterate it several times

2120

coalesced = list(coalesced)

2121

if 'http' in debug.debug_flags:

2122

mutter('http readv of %s offsets => %s collapsed %s',

2123

relpath, len(offsets), len(coalesced))

2124

2125

# Cache the data read, but only until it's been used

2126

data_map = {}

2127

# We will iterate on the data received from the GET requests and

2128

# serve the corresponding offsets respecting the initial order. We

2129

# need an offset iterator for that.

2130

iter_offsets = iter(offsets)

2131

try:

2132

cur_offset_and_size = next(iter_offsets)

2133

except StopIteration:

2134

return

2135

2136

try:

2137

for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):

2138

# Split the received chunk

2139

for offset, size in cur_coal.ranges:

2140

start = cur_coal.start + offset

2141

rfile.seek(start, os.SEEK_SET)

2142

data = rfile.read(size)

2143

data_len = len(data)

2144

if data_len != size:

2145

raise errors.ShortReadvError(relpath, start, size,

2146

actual=data_len)

2147

if (start, size) == cur_offset_and_size:

2148

# The offset requested are sorted as the coalesced

2149

# ones, no need to cache. Win !

2150

yield cur_offset_and_size[0], data

2151

try:

2152

cur_offset_and_size = next(iter_offsets)

2153

except StopIteration:

2154

return

2155

else:

2156

# Different sorting. We need to cache.

2157

data_map[(start, size)] = data

2158

2159

# Yield everything we can

2160

while cur_offset_and_size in data_map:

2161

# Clean the cached data since we use it

2162

# XXX: will break if offsets contains duplicates --

2163

# vila20071129

2164

this_data = data_map.pop(cur_offset_and_size)

2165

yield cur_offset_and_size[0], this_data

2166

try:

2167

cur_offset_and_size = next(iter_offsets)

2168

except StopIteration:

2169

return

2170

2171

except (errors.ShortReadvError, errors.InvalidRange,

2172

errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:

2173

mutter('Exception %r: %s during http._readv', e, e)

2174

if (not isinstance(e, errors.ShortReadvError)

2175

or retried_offset == cur_offset_and_size):

2176

# We don't degrade the range hint for ShortReadvError since

2177

# they do not indicate a problem with the server ability to

2178

# handle ranges. Except when we fail to get back a required

2179

# offset twice in a row. In that case, falling back to

2180

# single range or whole file should help.

2181

if not self._degrade_range_hint(relpath, coalesced):

2182

raise

2183

# Some offsets may have been already processed, so we retry

2184

# only the unsuccessful ones.

2185

offsets = [cur_offset_and_size] + [o for o in iter_offsets]

2186

retried_offset = cur_offset_and_size

2187

try_again = True

2188

2189

def _coalesce_readv(self, relpath, coalesced):

2190

"""Issue several GET requests to satisfy the coalesced offsets"""

2191

2192

def get_and_yield(relpath, coalesced):

2193

if coalesced:

2194

# Note that the _get below may raise

2195

# errors.InvalidHttpRange. It's the caller's responsibility to

2196

# decide how to retry since it may provide different coalesced

2197

# offsets.

2198

code, rfile = self._get(relpath, coalesced)

2199

for coal in coalesced:

2200

yield coal, rfile

2201

2202

if self._range_hint is None:

2203

# Download whole file

2204

for c, rfile in get_and_yield(relpath, coalesced):

2205

yield c, rfile

2206

else:

2207

total = len(coalesced)

2208

if self._range_hint == 'multi':

2209

max_ranges = self._max_get_ranges

2210

elif self._range_hint == 'single':

2211

max_ranges = total

2212

else:

2213

raise AssertionError("Unknown _range_hint %r"

2214

% (self._range_hint,))

2215

# TODO: Some web servers may ignore the range requests and return

2216

# the whole file, we may want to detect that and avoid further

2217

# requests.

2218

# Hint: test_readv_multiple_get_requests will fail once we do that

2219

cumul = 0

2220

ranges = []

2221

for coal in coalesced:

2222

if ((self._get_max_size > 0

2223

and cumul + coal.length > self._get_max_size) or

2224

len(ranges) >= max_ranges):

2225

# Get that much and yield

2226

for c, rfile in get_and_yield(relpath, ranges):

2227

yield c, rfile

2228

# Restart with the current offset

2229

ranges = [coal]

2230

cumul = coal.length

2231

else:

2232

ranges.append(coal)

2233

cumul += coal.length

2234

# Get the rest and yield

2235

for c, rfile in get_and_yield(relpath, ranges):

2236

yield c, rfile

2237

2238

def recommended_page_size(self):

2239

"""See Transport.recommended_page_size().

2240

2241

For HTTP we suggest a large page size to reduce the overhead

2242

introduced by latency.

2243

"""

2244

return 64 * 1024

2245

2246

def _post(self, body_bytes):

2247

"""POST body_bytes to .bzr/smart on this transport.

2248

2249

:returns: (response code, response body file-like object).

2250

"""

2251

# TODO: Requiring all the body_bytes to be available at the beginning of

2252

# the POST may require large client buffers. It would be nice to have

2253

# an interface that allows streaming via POST when possible (and

2254

# degrades to a local buffer when not).

2255

abspath = self._remote_path('.bzr/smart')

2256

response = self.request(

2257

'POST', abspath, body=body_bytes,

2258

headers={'Content-Type': 'application/octet-stream'})

2259

if response.status not in (200, 403):

2260

raise errors.UnexpectedHttpStatus(abspath, response.status)

2261

code = response.status

2262

data = handle_response(

2263

abspath, code, response.getheader, response)

2264

return code, data

2265

2266

def _head(self, relpath):

2267

"""Request the HEAD of a file.

2268

2269

Performs the request and leaves callers handle the results.

2270

"""

2271

abspath = self._remote_path(relpath)

2272

response = self.request('HEAD', abspath)

2273

if response.status not in (200, 404):

2274

raise errors.UnexpectedHttpStatus(abspath, response.status)

2275

2276

return response

2277

2278

raise NotImplementedError(self._post)

2279

2280

def put_file(self, relpath, f, mode=None):

2281

"""Copy the file-like object into the location.

2282

2283

:param relpath: Location to put the contents, relative to base.

2284

:param f: File-like object.

2285

"""

2286

raise errors.TransportNotPossible('http PUT not supported')

2287

2288

def mkdir(self, relpath, mode=None):

2289

"""Create a directory at the given path."""

2290

raise errors.TransportNotPossible('http does not support mkdir()')

2291

2292

def rmdir(self, relpath):

2293

"""See Transport.rmdir."""

2294

raise errors.TransportNotPossible('http does not support rmdir()')

2295

2296

def append_file(self, relpath, f, mode=None):

2297

"""Append the text in the file-like object into the final

2298

location.

2299

"""

2300

raise errors.TransportNotPossible('http does not support append()')

2301

2302

def copy(self, rel_from, rel_to):

2303

"""Copy the item at rel_from to the location at rel_to"""

2304

raise errors.TransportNotPossible('http does not support copy()')

2305

2306

def copy_to(self, relpaths, other, mode=None, pb=None):

2307

"""Copy a set of entries from self into another Transport.

2308

2309

:param relpaths: A list/generator of entries to be copied.

2310

2311

TODO: if other is LocalTransport, is it possible to

2312

do better than put(get())?

2313

"""

2314

# At this point HttpTransport might be able to check and see if

2315

# the remote location is the same, and rather than download, and

2316

# then upload, it could just issue a remote copy_this command.

2317

if isinstance(other, HttpTransport):

2318

raise errors.TransportNotPossible(

2319

'http cannot be the target of copy_to()')

2320

else:

2321

return super(HttpTransport, self).\

2322

copy_to(relpaths, other, mode=mode, pb=pb)

2323

2324

def move(self, rel_from, rel_to):

2325

"""Move the item at rel_from to the location at rel_to"""

2326

raise errors.TransportNotPossible('http does not support move()')

2327

2328

def delete(self, relpath):

2329

"""Delete the item at relpath"""

2330

raise errors.TransportNotPossible('http does not support delete()')

2331

2332

def external_url(self):

2333

"""See breezy.transport.Transport.external_url."""

2334

# HTTP URL's are externally usable as long as they don't mention their

2335

# implementation qualifier

2336

url = self._parsed_url.clone()

2337

url.scheme = self._unqualified_scheme

2338

return str(url)

2339

2340

def is_readonly(self):

2341

"""See Transport.is_readonly."""

2342

return True

2343

2344

def listable(self):

2345

"""See Transport.listable."""

2346

return False

2347

2348

def stat(self, relpath):

2349

"""Return the stat information for a file.

2350

"""

2351

raise errors.TransportNotPossible('http does not support stat()')

2352

2353

def lock_read(self, relpath):

2354

"""Lock the given file for shared (read) access.

2355

:return: A lock object, which should be passed to Transport.unlock()

2356

"""

2357

# The old RemoteBranch ignore lock for reading, so we will

2358

# continue that tradition and return a bogus lock object.

2359

class BogusLock(object):

2360

def __init__(self, path):

2361

self.path = path

2362

2363

def unlock(self):

2364

pass

2365

return BogusLock(relpath)

2366

2367

def lock_write(self, relpath):

2368

"""Lock the given file for exclusive (write) access.

2369

WARNING: many transports do not support this, so trying avoid using it

2370

2371

:return: A lock object, which should be passed to Transport.unlock()

2372

"""

2373

raise errors.TransportNotPossible('http does not support lock_write()')

2374

2375

def _attempted_range_header(self, offsets, tail_amount):

2376

"""Prepare a HTTP Range header at a level the server should accept.

2377

2378

:return: the range header representing offsets/tail_amount or None if

2379

no header can be built.

2380

"""

2381

2382

if self._range_hint == 'multi':

2383

# Generate the header describing all offsets

2384

return self._range_header(offsets, tail_amount)

2385

elif self._range_hint == 'single':

2386

# Combine all the requested ranges into a single

2387

# encompassing one

2388

if len(offsets) > 0:

2389

if tail_amount not in (0, None):

2390

# Nothing we can do here to combine ranges with tail_amount

2391

# in a single range, just returns None. The whole file

2392

# should be downloaded.

2393

return None

2394

else:

2395

start = offsets[0].start

2396

last = offsets[-1]

2397

end = last.start + last.length - 1

2398

whole = self._coalesce_offsets([(start, end - start + 1)],

2399

limit=0, fudge_factor=0)

2400

return self._range_header(list(whole), 0)

2401

else:

2402

# Only tail_amount, requested, leave range_header

2403

# do its work

2404

return self._range_header(offsets, tail_amount)

2405

else:

2406

return None

2407

2408

@staticmethod

2409

def _range_header(ranges, tail_amount):

2410

"""Turn a list of bytes ranges into a HTTP Range header value.

2411

2412

:param ranges: A list of _CoalescedOffset

2413

:param tail_amount: The amount to get from the end of the file.

2414

2415

:return: HTTP range header string.

2416

2417

At least a non-empty ranges *or* a tail_amount must be

2418

provided.

2419

"""

2420

strings = []

2421

for offset in ranges:

2422

strings.append('%d-%d' % (offset.start,

2423

offset.start + offset.length - 1))

2424

2425

if tail_amount:

2426

strings.append('-%d' % tail_amount)

2427

2428

return ','.join(strings)

2429

2430

def _redirected_to(self, source, target):

2431

"""Returns a transport suitable to re-issue a redirected request.

2432

2433

:param source: The source url as returned by the server.

2434

:param target: The target url as returned by the server.

2435

2436

The redirection can be handled only if the relpath involved is not

2437

renamed by the redirection.

2438

2439

:returns: A transport

2440

:raise UnusableRedirect: when the URL can not be reinterpreted

2441

"""

2442

parsed_source = self._split_url(source)

2443

parsed_target = self._split_url(target)

2444

pl = len(self._parsed_url.path)

2445

# determine the excess tail - the relative path that was in

2446

# the original request but not part of this transports' URL.

2447

excess_tail = parsed_source.path[pl:].strip("/")

2448

if not parsed_target.path.endswith(excess_tail):

2449

# The final part of the url has been renamed, we can't handle the

2450

# redirection.

2451

raise UnusableRedirect(

2452

source, target, "final part of the url was renamed")

2453

2454

target_path = parsed_target.path

2455

if excess_tail:

2456

# Drop the tail that was in the redirect but not part of

2457

# the path of this transport.

2458

target_path = target_path[:-len(excess_tail)]

2459

2460

if parsed_target.scheme in ('http', 'https'):

2461

# Same protocol family (i.e. http[s]), we will preserve the same

2462

# http client implementation when a redirection occurs from one to

2463

# the other (otherwise users may be surprised that bzr switches

2464

# from one implementation to the other, and devs may suffer

2465

# debugging it).

2466

if (parsed_target.scheme == self._unqualified_scheme

2467

and parsed_target.host == self._parsed_url.host

2468

and parsed_target.port == self._parsed_url.port

2469

and (parsed_target.user is None or

2470

parsed_target.user == self._parsed_url.user)):

2471

# If a user is specified, it should match, we don't care about

2472

# passwords, wrong passwords will be rejected anyway.

2473

return self.clone(target_path)

2474

else:

2475

# Rebuild the url preserving the scheme qualification and the

2476

# credentials (if they don't apply, the redirected to server

2477

# will tell us, but if they do apply, we avoid prompting the

2478

# user)

2479

redir_scheme = parsed_target.scheme

2480

new_url = self._unsplit_url(redir_scheme,

2481

self._parsed_url.user,

2482

self._parsed_url.password,

2483

parsed_target.host, parsed_target.port,

2484

target_path)

2485

return transport.get_transport_from_url(new_url)

2486

else:

2487

# Redirected to a different protocol

2488

new_url = self._unsplit_url(parsed_target.scheme,

2489

parsed_target.user,

2490

parsed_target.password,

2491

parsed_target.host, parsed_target.port,

2492

target_path)

2493

return transport.get_transport_from_url(new_url)

2494

2495

def _options(self, relpath):

2496

abspath = self._remote_path(relpath)

2497

resp = self.request('OPTIONS', abspath)

2498

if resp.status == 404:

2499

raise errors.NoSuchFile(abspath)

2500

if resp.status in (403, 405):

2501

raise errors.InvalidHttpResponse(

2502

abspath,

2503

"OPTIONS not supported or forbidden for remote URL")

2504

return resp.getheaders()

2505

2506

2507

# TODO: May be better located in smart/medium.py with the other

2508

# SmartMedium classes

2509

class SmartClientHTTPMedium(medium.SmartClientMedium):

2510

2511

def __init__(self, http_transport):

2512

super(SmartClientHTTPMedium, self).__init__(http_transport.base)

2513

# We don't want to create a circular reference between the http

2514

# transport and its associated medium. Since the transport will live

2515

# longer than the medium, the medium keep only a weak reference to its

2516

# transport.

2517

self._http_transport_ref = weakref.ref(http_transport)

2518

2519

def get_request(self):

2520

return SmartClientHTTPMediumRequest(self)

2521

2522

def should_probe(self):

2523

return True

2524

2525

def remote_path_from_transport(self, transport):

2526

# Strip the optional 'bzr+' prefix from transport so it will have the

2527

# same scheme as self.

2528

transport_base = transport.base

2529

if transport_base.startswith('bzr+'):

2530

transport_base = transport_base[4:]

2531

rel_url = urlutils.relative_url(self.base, transport_base)

2532

return urlutils.unquote(rel_url)

2533

2534

def send_http_smart_request(self, bytes):

2535

try:

2536

# Get back the http_transport hold by the weak reference

2537

t = self._http_transport_ref()

2538

code, body_filelike = t._post(bytes)

2539

if code != 200:

2540

raise errors.UnexpectedHttpStatus(

2541

t._remote_path('.bzr/smart'), code)

2542

except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:

2543

raise errors.SmartProtocolError(str(e))

2544

return body_filelike

2545

2546

def _report_activity(self, bytes, direction):

2547

"""See SmartMedium._report_activity.

2548

2549

Does nothing; the underlying plain HTTP transport will report the

2550

activity that this medium would report.

2551

"""

2552

pass

2553

2554

def disconnect(self):

2555

"""See SmartClientMedium.disconnect()."""

2556

t = self._http_transport_ref()

2557

t.disconnect()

2558

2559

2560

# TODO: May be better located in smart/medium.py with the other

2561

# SmartMediumRequest classes

2562

class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):

2563

"""A SmartClientMediumRequest that works with an HTTP medium."""

2564

2565

def __init__(self, client_medium):

2566

medium.SmartClientMediumRequest.__init__(self, client_medium)

2567

self._buffer = b''

2568

2569

def _accept_bytes(self, bytes):

2570

self._buffer += bytes

2571

2572

def _finished_writing(self):

2573

data = self._medium.send_http_smart_request(self._buffer)

2574

self._response_body = data

2575

2576

def _read_bytes(self, count):

2577

"""See SmartClientMediumRequest._read_bytes."""

2578

return self._response_body.read(count)

2579

2580

def _read_line(self):

2581

line, excess = medium._get_line(self._response_body.read)

2582

if excess != b'':

2583

raise AssertionError(

2584

'_get_line returned excess bytes, but this mediumrequest '

2585

'cannot handle excess. (%r)' % (excess,))

2586

return line

2587

2588

def _finished_reading(self):

2589

"""See SmartClientMediumRequest._finished_reading."""

2590

pass

2591

2592

2593

def unhtml_roughly(maybe_html, length_limit=1000):

2594

"""Very approximate html->text translation, for presenting error bodies.

2595

2596

:param length_limit: Truncate the result to this many characters.

2597

2598

>>> unhtml_roughly("<b>bad</b> things happened\\n")

2599

' bad things happened '

2600

"""

2601

return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit]

2602

2603

2604

def get_test_permutations():

2605

"""Return the permutations to be used in testing."""

2606

from breezy.tests import (

2607

features,

2608

http_server,

2609

)

2610

permutations = [(HttpTransport, http_server.HttpServer), ]

2611

if features.HTTPSServerFeature.available():

2612

from breezy.tests import (

2613

https_server,

2614

ssl_certs,

2615

)

2616

2617

class HTTPS_transport(HttpTransport):

2618

2619

def __init__(self, base, _from_transport=None):

2620

super(HTTPS_transport, self).__init__(

2621

base, _from_transport=_from_transport,

2622

ca_certs=ssl_certs.build_path('ca.crt'))

2623

2624

permutations.append((HTTPS_transport,

2625

https_server.HTTPSServer))

2626

return permutations