/brz/remove-bazaar : revision 7532

152

116

* none: Certificates ignored

153

117

* required: Certificates required and validated

154

118

""")

155

156

checked_kerberos = False

157

kerberos = None

158

159

160

class _ReportingFileSocket(object):

161

162

def __init__(self, filesock, report_activity=None):

163

self.filesock = filesock

164

self._report_activity = report_activity

165

166

def report_activity(self, size, direction):

167

if self._report_activity:

168

self._report_activity(size, direction)

169

170

def read(self, size=1):

171

s = self.filesock.read(size)

172

self.report_activity(len(s), 'read')

173

return s

174

175

def readline(self, size=-1):

176

s = self.filesock.readline(size)

177

self.report_activity(len(s), 'read')

178

return s

179

180

def readinto(self, b):

181

s = self.filesock.readinto(b)

182

self.report_activity(s, 'read')

183

return s

184

185

def __getattr__(self, name):

186

return getattr(self.filesock, name)

187

188

189

class _ReportingSocket(object):

190

191

def __init__(self, sock, report_activity=None):

192

self.sock = sock

193

self._report_activity = report_activity

194

195

def report_activity(self, size, direction):

196

if self._report_activity:

197

self._report_activity(size, direction)

198

199

def sendall(self, s, *args):

200

self.sock.sendall(s, *args)

201

self.report_activity(len(s), 'write')

202

203

def recv(self, *args):

204

s = self.sock.recv(*args)

205

self.report_activity(len(s), 'read')

206

return s

207

208

def makefile(self, mode='r', bufsize=-1):

209

# http_client creates a fileobject that doesn't do buffering, which

210

# makes fp.readline() very expensive because it only reads one byte

211

# at a time. So we wrap the socket in an object that forces

212

# sock.makefile to make a buffered file.

213

fsock = self.sock.makefile(mode, 65536)

214

# And wrap that into a reporting kind of fileobject

215

return _ReportingFileSocket(fsock, self._report_activity)

216

217

def __getattr__(self, name):

218

return getattr(self.sock, name)

219

220

221

# We define our own Response class to keep our http_client pipe clean

222

class Response(http_client.HTTPResponse):

223

"""Custom HTTPResponse, to avoid the need to decorate.

224

225

http_client prefers to decorate the returned objects, rather

226

than using a custom object.

227

"""

228

229

# Some responses have bodies in which we have no interest

230

_body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]

231

232

# in finish() below, we may have to discard several MB in the worst

233

# case. To avoid buffering that much, we read and discard by chunks

234

# instead. The underlying file is either a socket or a StringIO, so reading

235

# 8k chunks should be fine.

236

_discarded_buf_size = 8192

237

238

def __init__(self, sock, debuglevel=0, method=None, url=None):

239

self.url = url

240

super(Response, self).__init__(

241

sock, debuglevel=debuglevel, method=method, url=url)

242

243

def begin(self):

244

"""Begin to read the response from the server.

245

246

http_client assumes that some responses get no content and do

247

not even attempt to read the body in that case, leaving

248

the body in the socket, blocking the next request. Let's

249

try to workaround that.

250

"""

251

http_client.HTTPResponse.begin(self)

252

if self.status in self._body_ignored_responses:

253

if self.debuglevel >= 2:

254

print("For status: [%s], will ready body, length: %s" % (

255

self.status, self.length))

256

if not (self.length is None or self.will_close):

257

# In some cases, we just can't read the body not

258

# even try or we may encounter a 104, 'Connection

259

# reset by peer' error if there is indeed no body

260

# and the server closed the connection just after

261

# having issued the response headers (even if the

262

# headers indicate a Content-Type...)

263

body = self.read(self.length)

264

if self.debuglevel >= 9:

265

# This one can be huge and is generally not interesting

266

print("Consumed body: [%s]" % body)

267

self.close()

268

elif self.status == 200:

269

# Whatever the request is, it went ok, so we surely don't want to

270

# close the connection. Some cases are not correctly detected by

271

# http_client.HTTPConnection.getresponse (called by

272

# http_client.HTTPResponse.begin). The CONNECT response for the https

273

# through proxy case is one. Note: the 'will_close' below refers

274

# to the "true" socket between us and the server, whereas the

275

# 'close()' above refers to the copy of that socket created by

276

# http_client for the response itself. So, in the if above we close the

277

# socket to indicate that we are done with the response whereas

278

# below we keep the socket with the server opened.

279

self.will_close = False

280

281

def finish(self):

282

"""Finish reading the body.

283

284

In some cases, the client may have left some bytes to read in the

285

body. That will block the next request to succeed if we use a

286

persistent connection. If we don't use a persistent connection, well,

287

nothing will block the next request since a new connection will be

288

issued anyway.

289

290

:return: the number of bytes left on the socket (may be None)

291

"""

292

pending = None

293

if not self.isclosed():

294

# Make sure nothing was left to be read on the socket

295

pending = 0

296

data = True

297

while data and self.length:

298

# read() will update self.length

299

data = self.read(min(self.length, self._discarded_buf_size))

300

pending += len(data)

301

if pending:

302

trace.mutter("%s bytes left on the HTTP socket", pending)

303

self.close()

304

return pending

305

306

307

# Not inheriting from 'object' because http_client.HTTPConnection doesn't.

308

class AbstractHTTPConnection:

309

"""A custom HTTP(S) Connection, which can reset itself on a bad response"""

310

311

response_class = Response

312

313

# When we detect a server responding with the whole file to range requests,

314

# we want to warn. But not below a given thresold.

315

_range_warning_thresold = 1024 * 1024

316

317

def __init__(self, report_activity=None):

318

self._response = None

319

self._report_activity = report_activity

320

self._ranges_received_whole_file = None

321

322

def _mutter_connect(self):

323

netloc = '%s:%s' % (self.host, self.port)

324

if self.proxied_host is not None:

325

netloc += '(proxy for %s)' % self.proxied_host

326

trace.mutter('* About to connect() to %s' % netloc)

327

328

def getresponse(self):

329

"""Capture the response to be able to cleanup"""

330

self._response = http_client.HTTPConnection.getresponse(self)

331

return self._response

332

333

def cleanup_pipe(self):

334

"""Read the remaining bytes of the last response if any."""

335

if self._response is not None:

336

try:

337

pending = self._response.finish()

338

# Warn the user (once)

339

if (self._ranges_received_whole_file is None

340

and self._response.status == 200

341

and pending

342

and pending > self._range_warning_thresold):

343

self._ranges_received_whole_file = True

344

trace.warning(

345

'Got a 200 response when asking for multiple ranges,'

346

' does your server at %s:%s support range requests?',

347

self.host, self.port)

348

except socket.error as e:

349

# It's conceivable that the socket is in a bad state here

350

# (including some test cases) and in this case, it doesn't need

351

# cleaning anymore, so no need to fail, we just get rid of the

352

# socket and let callers reconnect

353

if (len(e.args) == 0

354

or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):

355

raise

356

self.close()

357

self._response = None

358

# Preserve our preciousss

359

sock = self.sock

360

self.sock = None

361

# Let http_client.HTTPConnection do its housekeeping

362

self.close()

363

# Restore our preciousss

364

self.sock = sock

365

366

def _wrap_socket_for_reporting(self, sock):

367

"""Wrap the socket before anybody use it."""

368

self.sock = _ReportingSocket(sock, self._report_activity)

369

370

371

class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):

372

373

# XXX: Needs refactoring at the caller level.

374

def __init__(self, host, port=None, proxied_host=None,

375

report_activity=None, ca_certs=None):

376

AbstractHTTPConnection.__init__(self, report_activity=report_activity)

377

http_client.HTTPConnection.__init__(self, host, port)

378

self.proxied_host = proxied_host

379

# ca_certs is ignored, it's only relevant for https

380

381

def connect(self):

382

if 'http' in debug.debug_flags:

383

self._mutter_connect()

384

http_client.HTTPConnection.connect(self)

385

self._wrap_socket_for_reporting(self.sock)

386

387

388

class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):

389

390

def __init__(self, host, port=None, key_file=None, cert_file=None,

391

proxied_host=None,

392

report_activity=None, ca_certs=None):

393

AbstractHTTPConnection.__init__(self, report_activity=report_activity)

394

http_client.HTTPSConnection.__init__(

395

self, host, port, key_file, cert_file)

396

self.proxied_host = proxied_host

397

self.ca_certs = ca_certs

398

399

def connect(self):

400

if 'http' in debug.debug_flags:

401

self._mutter_connect()

402

http_client.HTTPConnection.connect(self)

403

self._wrap_socket_for_reporting(self.sock)

404

if self.proxied_host is None:

405

self.connect_to_origin()

406

407

def connect_to_origin(self):

408

# FIXME JRV 2011-12-18: Use location config here?

409

config_stack = config.GlobalStack()

410

cert_reqs = config_stack.get('ssl.cert_reqs')

411

if self.proxied_host is not None:

412

host = self.proxied_host.split(":", 1)[0]

413

else:

414

host = self.host

415

if cert_reqs == ssl.CERT_NONE:

416

ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)

417

ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')

418

ca_certs = None

419

else:

420

if self.ca_certs is None:

421

ca_certs = config_stack.get('ssl.ca_certs')

422

else:

423

ca_certs = self.ca_certs

424

if ca_certs is None:

425

trace.warning(

426

"No valid trusted SSL CA certificates file set. See "

427

"'brz help ssl.ca_certs' for more information on setting "

428

"trusted CAs.")

429

try:

430

ssl_context = ssl.create_default_context(

431

purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)

432

ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE

433

if self.cert_file:

434

ssl_context.load_cert_chain(

435

keyfile=self.key_file, certfile=self.cert_file)

436

ssl_context.verify_mode = cert_reqs

437

ssl_sock = ssl_context.wrap_socket(

438

self.sock, server_hostname=self.host)

439

except ssl.SSLError:

440

trace.note(

441

"\n"

442

"See `brz help ssl.ca_certs` for how to specify trusted CA"

443

"certificates.\n"

444

"Pass -Ossl.cert_reqs=none to disable certificate "

445

"verification entirely.\n")

446

raise

447

# Wrap the ssl socket before anybody use it

448

self._wrap_socket_for_reporting(ssl_sock)

449

450

451

class Request(urllib_request.Request):

452

"""A custom Request object.

453

454

urllib_request determines the request method heuristically (based on

455

the presence or absence of data). We set the method

456

statically.

457

458

The Request object tracks:

459

- the connection the request will be made on.

460

- the authentication parameters needed to preventively set

461

the authentication header once a first authentication have

462

been made.

463

"""

464

465

def __init__(self, method, url, data=None, headers={},

466

origin_req_host=None, unverifiable=False,

467

connection=None, parent=None):

468

urllib_request.Request.__init__(

469

self, url, data, headers,

470

origin_req_host, unverifiable)

471

self.method = method

472

self.connection = connection

473

# To handle redirections

474

self.parent = parent

475

self.redirected_to = None

476

# Unless told otherwise, redirections are not followed

477

self.follow_redirections = False

478

# auth and proxy_auth are dicts containing, at least

479

# (scheme, host, port, realm, user, password, protocol, path).

480

# The dict entries are mostly handled by the AuthHandler.

481

# Some authentication schemes may add more entries.

482

self.auth = {}

483

self.proxy_auth = {}

484

self.proxied_host = None

485

486

def get_method(self):

487

return self.method

488

489

def set_proxy(self, proxy, type):

490

"""Set the proxy and remember the proxied host."""

491

host, port = splitport(self.host)

492

if port is None:

493

# We need to set the default port ourselves way before it gets set

494

# in the HTTP[S]Connection object at build time.

495

if self.type == 'https':

496

conn_class = HTTPSConnection

497

else:

498

conn_class = HTTPConnection

499

port = conn_class.default_port

500

self.proxied_host = '%s:%s' % (host, port)

501

urllib_request.Request.set_proxy(self, proxy, type)

502

# When urllib_request makes a https request with our wrapper code and a proxy,

503

# it sets Host to the https proxy, not the host we want to talk to.

504

# I'm fairly sure this is our fault, but what is the cause is an open

505

# question. -- Robert Collins May 8 2010.

506

self.add_unredirected_header('Host', self.proxied_host)

507

508

509

class _ConnectRequest(Request):

510

511

def __init__(self, request):

512

"""Constructor

513

514

:param request: the first request sent to the proxied host, already

515

processed by the opener (i.e. proxied_host is already set).

516

"""

517

# We give a fake url and redefine selector or urllib_request will be

518

# confused

519

Request.__init__(self, 'CONNECT', request.get_full_url(),

520

connection=request.connection)

521

if request.proxied_host is None:

522

raise AssertionError()

523

self.proxied_host = request.proxied_host

524

525

@property

526

def selector(self):

527

return self.proxied_host

528

529

def get_selector(self):

530

return self.selector

531

532

def set_proxy(self, proxy, type):

533

"""Set the proxy without remembering the proxied host.

534

535

We already know the proxied host by definition, the CONNECT request

536

occurs only when the connection goes through a proxy. The usual

537

processing (masquerade the request so that the connection is done to

538

the proxy while the request is targeted at another host) does not apply

539

here. In fact, the connection is already established with proxy and we

540

just want to enable the SSL tunneling.

541

"""

542

urllib_request.Request.set_proxy(self, proxy, type)

543

544

545

class ConnectionHandler(urllib_request.BaseHandler):

546

"""Provides connection-sharing by pre-processing requests.

547

548

urllib_request provides no way to access the HTTPConnection object

549

internally used. But we need it in order to achieve

550

connection sharing. So, we add it to the request just before

551

it is processed, and then we override the do_open method for

552

http[s] requests in AbstractHTTPHandler.

553

"""

554

555

handler_order = 1000 # after all pre-processings

556

557

def __init__(self, report_activity=None, ca_certs=None):

558

self._report_activity = report_activity

559

self.ca_certs = ca_certs

560

561

def create_connection(self, request, http_connection_class):

562

host = request.host

563

if not host:

564

# Just a bit of paranoia here, this should have been

565

# handled in the higher levels

566

raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')

567

568

# We create a connection (but it will not connect until the first

569

# request is made)

570

try:

571

connection = http_connection_class(

572

host, proxied_host=request.proxied_host,

573

report_activity=self._report_activity,

574

ca_certs=self.ca_certs)

575

except http_client.InvalidURL as exception:

576

# There is only one occurrence of InvalidURL in http_client

577

raise urlutils.InvalidURL(request.get_full_url(),

578

extra='nonnumeric port')

579

580

return connection

581

582

def capture_connection(self, request, http_connection_class):

583

"""Capture or inject the request connection.

584

585

Two cases:

586

- the request have no connection: create a new one,

587

588

- the request have a connection: this one have been used

589

already, let's capture it, so that we can give it to

590

another transport to be reused. We don't do that

591

ourselves: the Transport object get the connection from

592

a first request and then propagate it, from request to

593

request or to cloned transports.

594

"""

595

connection = request.connection

596

if connection is None:

597

# Create a new one

598

connection = self.create_connection(request, http_connection_class)

599

request.connection = connection

600

601

# All connections will pass here, propagate debug level

602

connection.set_debuglevel(DEBUG)

603

return request

604

605

def http_request(self, request):

606

return self.capture_connection(request, HTTPConnection)

607

608

def https_request(self, request):

609

return self.capture_connection(request, HTTPSConnection)

610

611

612

class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):

613

"""A custom handler for HTTP(S) requests.

614

615

We overrive urllib_request.AbstractHTTPHandler to get a better

616

control of the connection, the ability to implement new

617

request types and return a response able to cope with

618

persistent connections.

619

"""

620

621

# We change our order to be before urllib_request HTTP[S]Handlers

622

# and be chosen instead of them (the first http_open called

623

# wins).

624

handler_order = 400

625

626

_default_headers = {'Pragma': 'no-cache',

627

'Cache-control': 'max-age=0',

628

'Connection': 'Keep-Alive',

629

'User-agent': default_user_agent(),

630

'Accept': '*/*',

631

}

632

633

def __init__(self):

634

urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)

635

636

def http_request(self, request):

637

"""Common headers setting"""

638

639

for name, value in self._default_headers.items():

640

if name not in request.headers:

641

request.headers[name] = value

642

# FIXME: We may have to add the Content-Length header if

643

# we have data to send.

644

return request

645

646

def retry_or_raise(self, http_class, request, first_try):

647

"""Retry the request (once) or raise the exception.

648

649

urllib_request raises exception of application level kind, we

650

just have to translate them.

651

652

http_client can raise exceptions of transport level (badly

653

formatted dialog, loss of connexion or socket level

654

problems). In that case we should issue the request again

655

(http_client will close and reopen a new connection if

656

needed).

657

"""

658

# When an exception occurs, we give back the original

659

# Traceback or the bugs are hard to diagnose.

660

exc_type, exc_val, exc_tb = sys.exc_info()

661

if exc_type == socket.gaierror:

662

# No need to retry, that will not help

663

origin_req_host = request.origin_req_host

664

raise errors.ConnectionError("Couldn't resolve host '%s'"

665

% origin_req_host,

666

orig_error=exc_val)

667

elif isinstance(exc_val, http_client.ImproperConnectionState):

668

# The http_client pipeline is in incorrect state, it's a bug in our

669

# implementation.

670

raise exc_val

671

else:

672

if first_try:

673

if self._debuglevel >= 2:

674

print('Received exception: [%r]' % exc_val)

675

print(' On connection: [%r]' % request.connection)

676

method = request.get_method()

677

url = request.get_full_url()

678

print(' Will retry, %s %r' % (method, url))

679

request.connection.close()

680

response = self.do_open(http_class, request, False)

681

else:

682

if self._debuglevel >= 2:

683

print('Received second exception: [%r]' % exc_val)

684

print(' On connection: [%r]' % request.connection)

685

if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):

686

# http_client.BadStatusLine and

687

# http_client.UnknownProtocol indicates that a

688

# bogus server was encountered or a bad

689

# connection (i.e. transient errors) is

690

# experimented, we have already retried once

691

# for that request so we raise the exception.

692

my_exception = errors.InvalidHttpResponse(

693

request.get_full_url(),

694

'Bad status line received',

695

orig_error=exc_val)

696

elif (isinstance(exc_val, socket.error) and len(exc_val.args)

697

and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):

698

# 10053 == WSAECONNABORTED

699

# 10054 == WSAECONNRESET

700

raise errors.ConnectionReset(

701

"Connection lost while sending request.")

702

else:

703

# All other exception are considered connection related.

704

705

# socket errors generally occurs for reasons

706

# far outside our scope, so closing the

707

# connection and retrying is the best we can

708

# do.

709

selector = request.selector

710

my_exception = errors.ConnectionError(

711

msg='while sending %s %s:' % (request.get_method(),

712

selector),

713

orig_error=exc_val)

714

715

if self._debuglevel >= 2:

716

print('On connection: [%r]' % request.connection)

717

method = request.get_method()

718

url = request.get_full_url()

719

print(' Failed again, %s %r' % (method, url))

720

print(' Will raise: [%r]' % my_exception)

721

raise my_exception.with_traceback(exc_tb)

722

return response

723

724

def do_open(self, http_class, request, first_try=True):

725

"""See urllib_request.AbstractHTTPHandler.do_open for the general idea.

726

727

The request will be retried once if it fails.

728

"""

729

connection = request.connection

730

if connection is None:

731

raise AssertionError(

732

'Cannot process a request without a connection')

733

734

# Get all the headers

735

headers = {}

736

headers.update(request.header_items())

737

headers.update(request.unredirected_hdrs)

738

# Some servers or proxies will choke on headers not properly

739

# cased. http_client/urllib/urllib_request all use capitalize to get canonical

740

# header names, but only python2.5 urllib_request use title() to fix them just

741

# before sending the request. And not all versions of python 2.5 do

742

# that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it

743

# ourself below.

744

headers = {name.title(): val for name, val in headers.items()}

745

746

try:

747

method = request.get_method()

748

url = request.selector

749

if sys.version_info[:2] >= (3, 6):

750

connection._send_request(method, url,

751

# FIXME: implements 100-continue

752

# None, # We don't send the body yet

753

request.data,

754

headers, encode_chunked=False)

755

else:

756

connection._send_request(method, url,

757

# FIXME: implements 100-continue

758

# None, # We don't send the body yet

759

request.data,

760

headers)

761

if 'http' in debug.debug_flags:

762

trace.mutter('> %s %s' % (method, url))

763

hdrs = []

764

for k, v in headers.items():

765

# People are often told to paste -Dhttp output to help

766

# debug. Don't compromise credentials.

767

if k in ('Authorization', 'Proxy-Authorization'):

768

v = '<masked>'

769

hdrs.append('%s: %s' % (k, v))

770

trace.mutter('> ' + '\n> '.join(hdrs) + '\n')

771

if self._debuglevel >= 1:

772

print('Request sent: [%r] from (%s)'

773

% (request, request.connection.sock.getsockname()))

774

response = connection.getresponse()

775

convert_to_addinfourl = True

776

except (ssl.SSLError, ssl.CertificateError):

777

# Something is wrong with either the certificate or the hostname,

778

# re-trying won't help

779

raise

780

except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,

781

socket.error, http_client.HTTPException):

782

response = self.retry_or_raise(http_class, request, first_try)

783

convert_to_addinfourl = False

784

785

response.msg = response.reason

786

return response

787

788

789

class HTTPHandler(AbstractHTTPHandler):

790

"""A custom handler that just thunks into HTTPConnection"""

791

792

def http_open(self, request):

793

return self.do_open(HTTPConnection, request)

794

795

796

class HTTPSHandler(AbstractHTTPHandler):

797

"""A custom handler that just thunks into HTTPSConnection"""

798

799

https_request = AbstractHTTPHandler.http_request

800

801

def https_open(self, request):

802

connection = request.connection

803

if connection.sock is None and \

804

connection.proxied_host is not None and \

805

request.get_method() != 'CONNECT': # Don't loop

806

# FIXME: We need a gazillion connection tests here, but we still

807

# miss a https server :-( :

808

# - with and without proxy

809

# - with and without certificate

810

# - with self-signed certificate

811

# - with and without authentication

812

# - with good and bad credentials (especially the proxy auth around

813

# CONNECT)

814

# - with basic and digest schemes

815

# - reconnection on errors

816

# - connection persistence behaviour (including reconnection)

817

818

# We are about to connect for the first time via a proxy, we must

819

# issue a CONNECT request first to establish the encrypted link

820

connect = _ConnectRequest(request)

821

response = self.parent.open(connect)

822

if response.code != 200:

823

raise errors.ConnectionError("Can't connect to %s via proxy %s" % (

824

connect.proxied_host, self.host))

825

# Housekeeping

826

connection.cleanup_pipe()

827

# Establish the connection encryption

828

connection.connect_to_origin()

829

# Propagate the connection to the original request

830

request.connection = connection

831

return self.do_open(HTTPSConnection, request)

832

833

834

class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):

835

"""Handles redirect requests.

836

837

We have to implement our own scheme because we use a specific

838

Request object and because we want to implement a specific

839

policy.

840

"""

841

_debuglevel = DEBUG

842

# RFC2616 says that only read requests should be redirected

843

# without interacting with the user. But Breezy uses some

844

# shortcuts to optimize against roundtrips which can leads to

845

# write requests being issued before read requests of

846

# containing dirs can be redirected. So we redirect write

847

# requests in the same way which seems to respect the spirit

848

# of the RFC if not its letter.

849

850

def redirect_request(self, req, fp, code, msg, headers, newurl):

851

"""See urllib_request.HTTPRedirectHandler.redirect_request"""

852

# We would have preferred to update the request instead

853

# of creating a new one, but the urllib_request.Request object

854

# has a too complicated creation process to provide a

855

# simple enough equivalent update process. Instead, when

856

# redirecting, we only update the following request in

857

# the redirect chain with a reference to the parent

858

# request .

859

860

# Some codes make no sense in our context and are treated

861

# as errors:

862

863

# 300: Multiple choices for different representations of

864

# the URI. Using that mechanisn with Breezy will violate the

865

# protocol neutrality of Transport.

866

867

# 304: Not modified (SHOULD only occurs with conditional

868

# GETs which are not used by our implementation)

869

870

# 305: Use proxy. I can't imagine this one occurring in

871

# our context-- vila/20060909

872

873

# 306: Unused (if the RFC says so...)

874

875

# If the code is 302 and the request is HEAD, some may

876

# think that it is a sufficent hint that the file exists

877

# and that we MAY avoid following the redirections. But

878

# if we want to be sure, we MUST follow them.

879

880

origin_req_host = req.origin_req_host

881

882

if code in (301, 302, 303, 307, 308):

883

return Request(req.get_method(), newurl,

884

headers=req.headers,

885

origin_req_host=origin_req_host,

886

unverifiable=True,

887

# TODO: It will be nice to be able to

888

# detect virtual hosts sharing the same

889

# IP address, that will allow us to

890

# share the same connection...

891

connection=None,

892

parent=req,

893

)

894

else:

895

raise urllib_request.HTTPError(

896

req.get_full_url(), code, msg, headers, fp)

897

898

def http_error_302(self, req, fp, code, msg, headers):

899

"""Requests the redirected to URI.

900

901

Copied from urllib_request to be able to clean the pipe of the associated

902

connection, *before* issuing the redirected request but *after* having

903

eventually raised an error.

904

"""

905

# Some servers (incorrectly) return multiple Location headers

906

# (so probably same goes for URI). Use first header.

907

908

# TODO: Once we get rid of addinfourl objects, the

909

# following will need to be updated to use correct case

910

# for headers.

911

if 'location' in headers:

912

newurl = headers.get('location')

913

elif 'uri' in headers:

914

newurl = headers.get('uri')

915

else:

916

return

917

918

newurl = urljoin(req.get_full_url(), newurl)

919

920

if self._debuglevel >= 1:

921

print('Redirected to: %s (followed: %r)' % (newurl,

922

req.follow_redirections))

923

if req.follow_redirections is False:

924

req.redirected_to = newurl

925

return fp

926

927

# This call succeeds or raise an error. urllib_request returns

928

# if redirect_request returns None, but our

929

# redirect_request never returns None.

930

redirected_req = self.redirect_request(req, fp, code, msg, headers,

931

newurl)

932

933

# loop detection

934

# .redirect_dict has a key url if url was previously visited.

935

if hasattr(req, 'redirect_dict'):

936

visited = redirected_req.redirect_dict = req.redirect_dict

937

if (visited.get(newurl, 0) >= self.max_repeats or

938

len(visited) >= self.max_redirections):

939

raise urllib_request.HTTPError(req.get_full_url(), code,

940

self.inf_msg + msg, headers, fp)

941

else:

942

visited = redirected_req.redirect_dict = req.redirect_dict = {}

943

visited[newurl] = visited.get(newurl, 0) + 1

944

945

# We can close the fp now that we are sure that we won't

946

# use it with HTTPError.

947

fp.close()

948

# We have all we need already in the response

949

req.connection.cleanup_pipe()

950

951

return self.parent.open(redirected_req)

952

953

http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302

954

955

956

class ProxyHandler(urllib_request.ProxyHandler):

957

"""Handles proxy setting.

958

959

Copied and modified from urllib_request to be able to modify the request during

960

the request pre-processing instead of modifying it at _open time. As we

961

capture (or create) the connection object during request processing, _open

962

time was too late.

963

964

The main task is to modify the request so that the connection is done to

965

the proxy while the request still refers to the destination host.

966

967

Note: the proxy handling *may* modify the protocol used; the request may be

968

against an https server proxied through an http proxy. So, https_request

969

will be called, but later it's really http_open that will be called. This

970

explains why we don't have to call self.parent.open as the urllib_request did.

971

"""

972

973

# Proxies must be in front

974

handler_order = 100

975

_debuglevel = DEBUG

976

977

def __init__(self, proxies=None):

978

urllib_request.ProxyHandler.__init__(self, proxies)

979

# First, let's get rid of urllib_request implementation

980

for type, proxy in self.proxies.items():

981

if self._debuglevel >= 3:

982

print('Will unbind %s_open for %r' % (type, proxy))

983

delattr(self, '%s_open' % type)

984

985

def bind_scheme_request(proxy, scheme):

986

if proxy is None:

987

return

988

scheme_request = scheme + '_request'

989

if self._debuglevel >= 3:

990

print('Will bind %s for %r' % (scheme_request, proxy))

991

setattr(self, scheme_request,

992

lambda request: self.set_proxy(request, scheme))

993

# We are interested only by the http[s] proxies

994

http_proxy = self.get_proxy_env_var('http')

995

bind_scheme_request(http_proxy, 'http')

996

https_proxy = self.get_proxy_env_var('https')

997

bind_scheme_request(https_proxy, 'https')

998

999

def get_proxy_env_var(self, name, default_to='all'):

1000

"""Get a proxy env var.

1001

1002

Note that we indirectly rely on

1003

urllib.getproxies_environment taking into account the

1004

uppercased values for proxy variables.

1005

"""

1006

try:

1007

return self.proxies[name.lower()]

1008

except KeyError:

1009

if default_to is not None:

1010

# Try to get the alternate environment variable

1011

try:

1012

return self.proxies[default_to]

1013

except KeyError:

1014

pass

1015

return None

1016

1017

def proxy_bypass(self, host):

1018

"""Check if host should be proxied or not.

1019

1020

:returns: True to skip the proxy, False otherwise.

1021

"""

1022

no_proxy = self.get_proxy_env_var('no', default_to=None)

1023

bypass = self.evaluate_proxy_bypass(host, no_proxy)

1024

if bypass is None:

1025

# Nevertheless, there are platform-specific ways to

1026

# ignore proxies...

1027

return urllib_request.proxy_bypass(host)

1028

else:

1029

return bypass

1030

1031

def evaluate_proxy_bypass(self, host, no_proxy):

1032

"""Check the host against a comma-separated no_proxy list as a string.

1033

1034

:param host: ``host:port`` being requested

1035

1036

:param no_proxy: comma-separated list of hosts to access directly.

1037

1038

:returns: True to skip the proxy, False not to, or None to

1039

leave it to urllib.

1040

"""

1041

if no_proxy is None:

1042

# All hosts are proxied

1043

return False

1044

hhost, hport = splitport(host)

1045

# Does host match any of the domains mentioned in

1046

# no_proxy ? The rules about what is authorized in no_proxy

1047

# are fuzzy (to say the least). We try to allow most

1048

# commonly seen values.

1049

for domain in no_proxy.split(','):

1050

domain = domain.strip()

1051

if domain == '':

1052

continue

1053

dhost, dport = splitport(domain)

1054

if hport == dport or dport is None:

1055

# Protect glob chars

1056

dhost = dhost.replace(".", r"\.")

1057

dhost = dhost.replace("*", r".*")

1058

dhost = dhost.replace("?", r".")

1059

if re.match(dhost, hhost, re.IGNORECASE):

1060

return True

1061

# Nothing explicitly avoid the host

1062

return None

1063

1064

def set_proxy(self, request, type):

1065

host = request.host

1066

if self.proxy_bypass(host):

1067

return request

1068

1069

proxy = self.get_proxy_env_var(type)

1070

if self._debuglevel >= 3:

1071

print('set_proxy %s_request for %r' % (type, proxy))

1072

# FIXME: python 2.5 urlparse provides a better _parse_proxy which can

1073

# grok user:password@host:port as well as

1074

# http://user:password@host:port

1075

1076

parsed_url = transport.ConnectedTransport._split_url(proxy)

1077

if not parsed_url.host:

1078

raise urlutils.InvalidURL(proxy, 'No host component')

1079

1080

if request.proxy_auth == {}:

1081

# No proxy auth parameter are available, we are handling the first

1082

# proxied request, intialize. scheme (the authentication scheme)

1083

# and realm will be set by the AuthHandler

1084

request.proxy_auth = {

1085

'host': parsed_url.host,

1086

'port': parsed_url.port,

1087

'user': parsed_url.user,

1088

'password': parsed_url.password,

1089

'protocol': parsed_url.scheme,

1090

# We ignore path since we connect to a proxy

1091

'path': None}

1092

if parsed_url.port is None:

1093

phost = parsed_url.host

1094

else:

1095

phost = parsed_url.host + ':%d' % parsed_url.port

1096

request.set_proxy(phost, type)

1097

if self._debuglevel >= 3:

1098

print('set_proxy: proxy set to %s://%s' % (type, phost))

1099

return request

1100

1101

1102

class AbstractAuthHandler(urllib_request.BaseHandler):

1103

"""A custom abstract authentication handler for all http authentications.

1104

1105

Provides the meat to handle authentication errors and

1106

preventively set authentication headers after the first

1107

successful authentication.

1108

1109

This can be used for http and proxy, as well as for basic, negotiate and

1110

digest authentications.

1111

1112

This provides an unified interface for all authentication handlers

1113

(urllib_request provides far too many with different policies).

1114

1115

The interaction between this handler and the urllib_request

1116

framework is not obvious, it works as follow:

1117

1118

opener.open(request) is called:

1119

1120

- that may trigger http_request which will add an authentication header

1121

(self.build_header) if enough info is available.

1122

1123

- the request is sent to the server,

1124

1125

- if an authentication error is received self.auth_required is called,

1126

we acquire the authentication info in the error headers and call

1127

self.auth_match to check that we are able to try the

1128

authentication and complete the authentication parameters,

1129

1130

- we call parent.open(request), that may trigger http_request

1131

and will add a header (self.build_header), but here we have

1132

all the required info (keep in mind that the request and

1133

authentication used in the recursive calls are really (and must be)

1134

the *same* objects).

1135

1136

- if the call returns a response, the authentication have been

1137

successful and the request authentication parameters have been updated.

1138

"""

1139

1140

scheme = None

1141

"""The scheme as it appears in the server header (lower cased)"""

1142

1143

_max_retry = 3

1144

"""We don't want to retry authenticating endlessly"""

1145

1146

requires_username = True

1147

"""Whether the auth mechanism requires a username."""

1148

1149

# The following attributes should be defined by daughter

1150

# classes:

1151

# - auth_required_header: the header received from the server

1152

# - auth_header: the header sent in the request

1153

1154

def __init__(self):

1155

# We want to know when we enter into an try/fail cycle of

1156

# authentications so we initialize to None to indicate that we aren't

1157

# in such a cycle by default.

1158

self._retry_count = None

1159

1160

def _parse_auth_header(self, server_header):

1161

"""Parse the authentication header.

1162

1163

:param server_header: The value of the header sent by the server

1164

describing the authenticaion request.

1165

1166

:return: A tuple (scheme, remainder) scheme being the first word in the

1167

given header (lower cased), remainder may be None.

1168

"""

1169

try:

1170

scheme, remainder = server_header.split(None, 1)

1171

except ValueError:

1172

scheme = server_header

1173

remainder = None

1174

return (scheme.lower(), remainder)

1175

1176

def update_auth(self, auth, key, value):

1177

"""Update a value in auth marking the auth as modified if needed"""

1178

old_value = auth.get(key, None)

1179

if old_value != value:

1180

auth[key] = value

1181

auth['modified'] = True

1182

1183

def auth_required(self, request, headers):

1184

"""Retry the request if the auth scheme is ours.

1185

1186

:param request: The request needing authentication.

1187

:param headers: The headers for the authentication error response.

1188

:return: None or the response for the authenticated request.

1189

"""

1190

# Don't try to authenticate endlessly

1191

if self._retry_count is None:

1192

# The retry being recusrsive calls, None identify the first retry

1193

self._retry_count = 1

1194

else:

1195

self._retry_count += 1

1196

if self._retry_count > self._max_retry:

1197

# Let's be ready for next round

1198

self._retry_count = None

1199

return None

1200

server_headers = headers.get_all(self.auth_required_header)

1201

if not server_headers:

1202

# The http error MUST have the associated

1203

# header. This must never happen in production code.

1204

trace.mutter('%s not found', self.auth_required_header)

1205

return None

1206

1207

auth = self.get_auth(request)

1208

auth['modified'] = False

1209

# Put some common info in auth if the caller didn't

1210

if auth.get('path', None) is None:

1211

parsed_url = urlutils.URL.from_string(request.get_full_url())

1212

self.update_auth(auth, 'protocol', parsed_url.scheme)

1213

self.update_auth(auth, 'host', parsed_url.host)

1214

self.update_auth(auth, 'port', parsed_url.port)

1215

self.update_auth(auth, 'path', parsed_url.path)

1216

# FIXME: the auth handler should be selected at a single place instead

1217

# of letting all handlers try to match all headers, but the current

1218

# design doesn't allow a simple implementation.

1219

for server_header in server_headers:

1220

# Several schemes can be proposed by the server, try to match each

1221

# one in turn

1222

matching_handler = self.auth_match(server_header, auth)

1223

if matching_handler:

1224

# auth_match may have modified auth (by adding the

1225

# password or changing the realm, for example)

1226

if (request.get_header(self.auth_header, None) is not None

1227

and not auth['modified']):

1228

# We already tried that, give up

1229

return None

1230

1231

# Only the most secure scheme proposed by the server should be

1232

# used, since the handlers use 'handler_order' to describe that

1233

# property, the first handler tried takes precedence, the

1234

# others should not attempt to authenticate if the best one

1235

# failed.

1236

best_scheme = auth.get('best_scheme', None)

1237

if best_scheme is None:

1238

# At that point, if current handler should doesn't succeed

1239

# the credentials are wrong (or incomplete), but we know

1240

# that the associated scheme should be used.

1241

best_scheme = auth['best_scheme'] = self.scheme

1242

if best_scheme != self.scheme:

1243

continue

1244

1245

if self.requires_username and auth.get('user', None) is None:

1246

# Without a known user, we can't authenticate

1247

return None

1248

1249

# Housekeeping

1250

request.connection.cleanup_pipe()

1251

# Retry the request with an authentication header added

1252

response = self.parent.open(request)

1253

if response:

1254

self.auth_successful(request, response)

1255

return response

1256

# We are not qualified to handle the authentication.

1257

# Note: the authentication error handling will try all

1258

# available handlers. If one of them authenticates

1259

# successfully, a response will be returned. If none of

1260

# them succeeds, None will be returned and the error

1261

# handler will raise the 401 'Unauthorized' or the 407

1262

# 'Proxy Authentication Required' error.

1263

return None

1264

1265

def add_auth_header(self, request, header):

1266

"""Add the authentication header to the request"""

1267

request.add_unredirected_header(self.auth_header, header)

1268

1269

def auth_match(self, header, auth):

1270

"""Check that we are able to handle that authentication scheme.

1271

1272

The request authentication parameters may need to be

1273

updated with info from the server. Some of these

1274

parameters, when combined, are considered to be the

1275

authentication key, if one of them change the

1276

authentication result may change. 'user' and 'password'

1277

are exampls, but some auth schemes may have others

1278

(digest's nonce is an example, digest's nonce_count is a

1279

*counter-example*). Such parameters must be updated by

1280

using the update_auth() method.

1281

1282

:param header: The authentication header sent by the server.

1283

:param auth: The auth parameters already known. They may be

1284

updated.

1285

:returns: True if we can try to handle the authentication.

1286

"""

1287

raise NotImplementedError(self.auth_match)

1288

1289

def build_auth_header(self, auth, request):

1290

"""Build the value of the header used to authenticate.

1291

1292

:param auth: The auth parameters needed to build the header.

1293

:param request: The request needing authentication.

1294

1295

:return: None or header.

1296

"""

1297

raise NotImplementedError(self.build_auth_header)

1298

1299

def auth_successful(self, request, response):

1300

"""The authentification was successful for the request.

1301

1302

Additional infos may be available in the response.

1303

1304

:param request: The succesfully authenticated request.

1305

:param response: The server response (may contain auth info).

1306

"""

1307

# It may happen that we need to reconnect later, let's be ready

1308

self._retry_count = None

1309

1310

def get_user_password(self, auth):

1311

"""Ask user for a password if none is already available.

1312

1313

:param auth: authentication info gathered so far (from the initial url

1314

and then during dialog with the server).

1315

"""

1316

auth_conf = config.AuthenticationConfig()

1317

user = auth.get('user', None)

1318

password = auth.get('password', None)

1319

realm = auth['realm']

1320

port = auth.get('port', None)

1321

1322

if user is None:

1323

user = auth_conf.get_user(auth['protocol'], auth['host'],

1324

port=port, path=auth['path'],

1325

realm=realm, ask=True,

1326

prompt=self.build_username_prompt(auth))

1327

if user is not None and password is None:

1328

password = auth_conf.get_password(

1329

auth['protocol'], auth['host'], user,

1330

port=port,

1331

path=auth['path'], realm=realm,

1332

prompt=self.build_password_prompt(auth))

1333

1334

return user, password

1335

1336

def _build_password_prompt(self, auth):

1337

"""Build a prompt taking the protocol used into account.

1338

1339

The AuthHandler is used by http and https, we want that information in

1340

the prompt, so we build the prompt from the authentication dict which

1341

contains all the needed parts.

1342

1343

Also, http and proxy AuthHandlers present different prompts to the

1344

user. The daughter classes should implements a public

1345

build_password_prompt using this method.

1346

"""

1347

prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'

1348

realm = auth['realm']

1349

if realm is not None:

1350

prompt += u", Realm: '%s'" % realm

1351

prompt += u' password'

1352

return prompt

1353

1354

def _build_username_prompt(self, auth):

1355

"""Build a prompt taking the protocol used into account.

1356

1357

The AuthHandler is used by http and https, we want that information in

1358

the prompt, so we build the prompt from the authentication dict which

1359

contains all the needed parts.

1360

1361

Also, http and proxy AuthHandlers present different prompts to the

1362

user. The daughter classes should implements a public

1363

build_username_prompt using this method.

1364

"""

1365

prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'

1366

realm = auth['realm']

1367

if realm is not None:

1368

prompt += u", Realm: '%s'" % realm

1369

prompt += u' username'

1370

return prompt

1371

1372

def http_request(self, request):

1373

"""Insert an authentication header if information is available"""

1374

auth = self.get_auth(request)

1375

if self.auth_params_reusable(auth):

1376

self.add_auth_header(

1377

request, self.build_auth_header(auth, request))

1378

return request

1379

1380

https_request = http_request # FIXME: Need test

1381

1382

1383

class NegotiateAuthHandler(AbstractAuthHandler):

1384

"""A authentication handler that handles WWW-Authenticate: Negotiate.

1385

1386

At the moment this handler supports just Kerberos. In the future,

1387

NTLM support may also be added.

1388

"""

1389

1390

scheme = 'negotiate'

1391

handler_order = 480

1392

requires_username = False

1393

1394

def auth_match(self, header, auth):

1395

scheme, raw_auth = self._parse_auth_header(header)

1396

if scheme != self.scheme:

1397

return False

1398

self.update_auth(auth, 'scheme', scheme)

1399

resp = self._auth_match_kerberos(auth)

1400

if resp is None:

1401

return False

1402

# Optionally should try to authenticate using NTLM here

1403

self.update_auth(auth, 'negotiate_response', resp)

1404

return True

1405

1406

def _auth_match_kerberos(self, auth):

1407

"""Try to create a GSSAPI response for authenticating against a host."""

1408

global kerberos, checked_kerberos

1409

if kerberos is None and not checked_kerberos:

1410

try:

1411

import kerberos

1412

except ImportError:

1413

kerberos = None

1414

checked_kerberos = True

1415

if kerberos is None:

1416

return None

1417

ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)

1418

if ret < 1:

1419

trace.warning('Unable to create GSSAPI context for %s: %d',

1420

auth['host'], ret)

1421

return None

1422

ret = kerberos.authGSSClientStep(vc, "")

1423

if ret < 0:

1424

trace.mutter('authGSSClientStep failed: %d', ret)

1425

return None

1426

return kerberos.authGSSClientResponse(vc)

1427

1428

def build_auth_header(self, auth, request):

1429

return "Negotiate %s" % auth['negotiate_response']

1430

1431

def auth_params_reusable(self, auth):

1432

# If the auth scheme is known, it means a previous

1433

# authentication was successful, all information is

1434

# available, no further checks are needed.

1435

return (auth.get('scheme', None) == 'negotiate' and

1436

auth.get('negotiate_response', None) is not None)

1437

1438

1439

class BasicAuthHandler(AbstractAuthHandler):

1440

"""A custom basic authentication handler."""

1441

1442

scheme = 'basic'

1443

handler_order = 500

1444

auth_regexp = re.compile('realm="([^"]*)"', re.I)

1445

1446

def build_auth_header(self, auth, request):

1447

raw = '%s:%s' % (auth['user'], auth['password'])

1448

auth_header = 'Basic ' + \

1449

base64.b64encode(raw.encode('utf-8')).decode('ascii')

1450

return auth_header

1451

1452

def extract_realm(self, header_value):

1453

match = self.auth_regexp.search(header_value)

1454

realm = None

1455

if match:

1456

realm = match.group(1)

1457

return match, realm

1458

1459

def auth_match(self, header, auth):

1460

scheme, raw_auth = self._parse_auth_header(header)

1461

if scheme != self.scheme:

1462

return False

1463

1464

match, realm = self.extract_realm(raw_auth)

1465

if match:

1466

# Put useful info into auth

1467

self.update_auth(auth, 'scheme', scheme)

1468

self.update_auth(auth, 'realm', realm)

1469

if (auth.get('user', None) is None

1470

or auth.get('password', None) is None):

1471

user, password = self.get_user_password(auth)

1472

self.update_auth(auth, 'user', user)

1473

self.update_auth(auth, 'password', password)

1474

return match is not None

1475

1476

def auth_params_reusable(self, auth):

1477

# If the auth scheme is known, it means a previous

1478

# authentication was successful, all information is

1479

# available, no further checks are needed.

1480

return auth.get('scheme', None) == 'basic'

1481

1482

1483

def get_digest_algorithm_impls(algorithm):

1484

H = None

1485

KD = None

1486

if algorithm == 'MD5':

1487

def H(x): return osutils.md5(x).hexdigest()

1488

elif algorithm == 'SHA':

1489

H = osutils.sha_string

1490

if H is not None:

1491

def KD(secret, data): return H(

1492

("%s:%s" % (secret, data)).encode('utf-8'))

1493

return H, KD

1494

1495

1496

def get_new_cnonce(nonce, nonce_count):

1497

raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),

1498

osutils.rand_chars(8))

1499

return osutils.sha_string(raw.encode('utf-8'))[:16]

1500

1501

1502

class DigestAuthHandler(AbstractAuthHandler):

1503

"""A custom digest authentication handler."""

1504

1505

scheme = 'digest'

1506

# Before basic as digest is a bit more secure and should be preferred

1507

handler_order = 490

1508

1509

def auth_params_reusable(self, auth):

1510

# If the auth scheme is known, it means a previous

1511

# authentication was successful, all information is

1512

# available, no further checks are needed.

1513

return auth.get('scheme', None) == 'digest'

1514

1515

def auth_match(self, header, auth):

1516

scheme, raw_auth = self._parse_auth_header(header)

1517

if scheme != self.scheme:

1518

return False

1519

1520

# Put the requested authentication info into a dict

1521

req_auth = urllib_request.parse_keqv_list(

1522

urllib_request.parse_http_list(raw_auth))

1523

1524

# Check that we can handle that authentication

1525

qop = req_auth.get('qop', None)

1526

if qop != 'auth': # No auth-int so far

1527

return False

1528

1529

H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))

1530

if H is None:

1531

return False

1532

1533

realm = req_auth.get('realm', None)

1534

# Put useful info into auth

1535

self.update_auth(auth, 'scheme', scheme)

1536

self.update_auth(auth, 'realm', realm)

1537

if auth.get('user', None) is None or auth.get('password', None) is None:

1538

user, password = self.get_user_password(auth)

1539

self.update_auth(auth, 'user', user)

1540

self.update_auth(auth, 'password', password)

1541

1542

try:

1543

if req_auth.get('algorithm', None) is not None:

1544

self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))

1545

nonce = req_auth['nonce']

1546

if auth.get('nonce', None) != nonce:

1547

# A new nonce, never used

1548

self.update_auth(auth, 'nonce_count', 0)

1549

self.update_auth(auth, 'nonce', nonce)

1550

self.update_auth(auth, 'qop', qop)

1551

auth['opaque'] = req_auth.get('opaque', None)

1552

except KeyError:

1553

# Some required field is not there

1554

return False

1555

1556

return True

1557

1558

def build_auth_header(self, auth, request):

1559

selector = request.selector

1560

url_scheme, url_selector = splittype(selector)

1561

sel_host, uri = splithost(url_selector)

1562

1563

A1 = ('%s:%s:%s' %

1564

(auth['user'], auth['realm'], auth['password'])).encode('utf-8')

1565

A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')

1566

1567

nonce = auth['nonce']

1568

qop = auth['qop']

1569

1570

nonce_count = auth['nonce_count'] + 1

1571

ncvalue = '%08x' % nonce_count

1572

cnonce = get_new_cnonce(nonce, nonce_count)

1573

1574

H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))

1575

nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))

1576

request_digest = KD(H(A1), nonce_data)

1577

1578

header = 'Digest '

1579

header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],

1580

auth['realm'],

1581

nonce)

1582

header += ', uri="%s"' % uri

1583

header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)

1584

header += ', qop="%s"' % qop

1585

header += ', response="%s"' % request_digest

1586

# Append the optional fields

1587

opaque = auth.get('opaque', None)

1588

if opaque:

1589

header += ', opaque="%s"' % opaque

1590

if auth.get('algorithm', None):

1591

header += ', algorithm="%s"' % auth.get('algorithm')

1592

1593

# We have used the nonce once more, update the count

1594

auth['nonce_count'] = nonce_count

1595

1596

return header

1597

1598

1599

class HTTPAuthHandler(AbstractAuthHandler):

1600

"""Custom http authentication handler.

1601

1602

Send the authentication preventively to avoid the roundtrip

1603

associated with the 401 error and keep the revelant info in

1604

the auth request attribute.

1605

"""

1606

1607

auth_required_header = 'www-authenticate'

1608

auth_header = 'Authorization'

1609

1610

def get_auth(self, request):

1611

"""Get the auth params from the request"""

1612

return request.auth

1613

1614

def set_auth(self, request, auth):

1615

"""Set the auth params for the request"""

1616

request.auth = auth

1617

1618

def build_password_prompt(self, auth):

1619

return self._build_password_prompt(auth)

1620

1621

def build_username_prompt(self, auth):

1622

return self._build_username_prompt(auth)

1623

1624

def http_error_401(self, req, fp, code, msg, headers):

1625

return self.auth_required(req, headers)

1626

1627

1628

class ProxyAuthHandler(AbstractAuthHandler):

1629

"""Custom proxy authentication handler.

1630

1631

Send the authentication preventively to avoid the roundtrip

1632

associated with the 407 error and keep the revelant info in

1633

the proxy_auth request attribute..

1634

"""

1635

1636

auth_required_header = 'proxy-authenticate'

1637

# FIXME: the correct capitalization is Proxy-Authorization,

1638

# but python-2.4 urllib_request.Request insist on using capitalize()

1639

# instead of title().

1640

auth_header = 'Proxy-authorization'

1641

1642

def get_auth(self, request):

1643

"""Get the auth params from the request"""

1644

return request.proxy_auth

1645

1646

def set_auth(self, request, auth):

1647

"""Set the auth params for the request"""

1648

request.proxy_auth = auth

1649

1650

def build_password_prompt(self, auth):

1651

prompt = self._build_password_prompt(auth)

1652

prompt = u'Proxy ' + prompt

1653

return prompt

1654

1655

def build_username_prompt(self, auth):

1656

prompt = self._build_username_prompt(auth)

1657

prompt = u'Proxy ' + prompt

1658

return prompt

1659

1660

def http_error_407(self, req, fp, code, msg, headers):

1661

return self.auth_required(req, headers)

1662

1663

1664

class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):

1665

"""Custom http basic authentication handler"""

1666

1667

1668

class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):

1669

"""Custom proxy basic authentication handler"""

1670

1671

1672

class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):

1673

"""Custom http basic authentication handler"""

1674

1675

1676

class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):

1677

"""Custom proxy basic authentication handler"""

1678

1679

1680

class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):

1681

"""Custom http negotiate authentication handler"""

1682

1683

1684

class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):

1685

"""Custom proxy negotiate authentication handler"""

1686

1687

1688

class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):

1689

"""Process HTTP error responses.

1690

1691

We don't really process the errors, quite the contrary

1692

instead, we leave our Transport handle them.

1693

"""

1694

1695

accepted_errors = [200, # Ok

1696

201,

1697

202,

1698

204,

1699

206, # Partial content

1700

400,

1701

403,

1702

404, # Not found

1703

405, # Method not allowed

1704

406, # Not Acceptable

1705

409, # Conflict

1706

416, # Range not satisfiable

1707

422, # Unprocessible entity

1708

501, # Not implemented

1709

]

1710

"""The error codes the caller will handle.

1711

1712

This can be specialized in the request on a case-by case basis, but the

1713

common cases are covered here.

1714

"""

1715

1716

def http_response(self, request, response):

1717

code, msg, hdrs = response.code, response.msg, response.info()

1718

1719

if code not in self.accepted_errors:

1720

response = self.parent.error('http', request, response,

1721

code, msg, hdrs)

1722

return response

1723

1724

https_response = http_response

1725

1726

1727

class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):

1728

"""Translate common errors into Breezy Exceptions"""

1729

1730

def http_error_default(self, req, fp, code, msg, hdrs):

1731

if code == 403:

1732

raise errors.TransportError(

1733

'Server refuses to fulfill the request (403 Forbidden)'

1734

' for %s' % req.get_full_url())

1735

else:

1736

raise errors.UnexpectedHttpStatus(

1737

req.get_full_url(), code,

1738

'Unable to handle http code: %s' % msg)

1739

1740

1741

class Opener(object):

1742

"""A wrapper around urllib_request.build_opener

1743

1744

Daughter classes can override to build their own specific opener

1745

"""

1746

# TODO: Provides hooks for daughter classes.

1747

1748

def __init__(self,

1749

connection=ConnectionHandler,

1750

redirect=HTTPRedirectHandler,

1751

error=HTTPErrorProcessor,

1752

report_activity=None,

1753

ca_certs=None):

1754

self._opener = urllib_request.build_opener(

1755

connection(report_activity=report_activity, ca_certs=ca_certs),

1756

redirect, error,

1757

ProxyHandler(),

1758

HTTPBasicAuthHandler(),

1759

HTTPDigestAuthHandler(),

1760

HTTPNegotiateAuthHandler(),

1761

ProxyBasicAuthHandler(),

1762

ProxyDigestAuthHandler(),

1763

ProxyNegotiateAuthHandler(),

1764

HTTPHandler,

1765

HTTPSHandler,

1766

HTTPDefaultErrorHandler,

1767

)

1768

1769

self.open = self._opener.open

1770

if DEBUG >= 9:

1771

# When dealing with handler order, it's easy to mess

1772

# things up, the following will help understand which

1773

# handler is used, when and for what.

1774

import pprint

1775

pprint.pprint(self._opener.__dict__)

1776

1777

1778

class HttpTransport(ConnectedTransport):

1779

"""HTTP Client implementations.

1780

1781

The protocol can be given as e.g. http+urllib://host/ to use a particular

1782

implementation.

1783

"""

1784

1785

# _unqualified_scheme: "http" or "https"

1786

# _scheme: may have "+pycurl", etc

1787

1788

# In order to debug we have to issue our traces in sync with

1789

# httplib, which use print :(

1790

_debuglevel = 0

1791

1792

def __init__(self, base, _from_transport=None, ca_certs=None):

1793

"""Set the base path where files will be stored."""

1794

proto_match = re.match(r'^(https?)(\+\w+)?://', base)

1795

if not proto_match:

1796

raise AssertionError("not a http url: %r" % base)

1797

self._unqualified_scheme = proto_match.group(1)

1798

super(HttpTransport, self).__init__(

1799

base, _from_transport=_from_transport)

1800

self._medium = None

1801

# range hint is handled dynamically throughout the life

1802

# of the transport object. We start by trying multi-range

1803

# requests and if the server returns bogus results, we

1804

# retry with single range requests and, finally, we

1805

# forget about range if the server really can't

1806

# understand. Once acquired, this piece of info is

1807

# propagated to clones.

1808

if _from_transport is not None:

1809

self._range_hint = _from_transport._range_hint

1810

self._opener = _from_transport._opener

1811

else:

1812

self._range_hint = 'multi'

1813

self._opener = Opener(

1814

report_activity=self._report_activity, ca_certs=ca_certs)

1815

1816

def request(self, method, url, fields=None, headers=None, **urlopen_kw):

1817

body = urlopen_kw.pop('body', None)

1818

if fields is not None:

1819

data = urlencode(fields).encode()

1820

if body is not None:

1821

raise ValueError(

1822

'body and fields are mutually exclusive')

1823

else:

1824

data = body

1825

if headers is None:

1826

headers = {}

1827

request = Request(method, url, data, headers)

1828

request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)

1829

if urlopen_kw:

1830

raise NotImplementedError(

1831

'unknown arguments: %r' % urlopen_kw.keys())

1832

connection = self._get_connection()

1833

if connection is not None:

1834

# Give back shared info

1835

request.connection = connection

1836

(auth, proxy_auth) = self._get_credentials()

1837

# Clean the httplib.HTTPConnection pipeline in case the previous

1838

# request couldn't do it

1839

connection.cleanup_pipe()

1840

else:

1841

# First request, initialize credentials.

1842

# scheme and realm will be set by the _urllib2_wrappers.AuthHandler

1843

auth = self._create_auth()

1844

# Proxy initialization will be done by the first proxied request

1845

proxy_auth = dict()

1846

# Ensure authentication info is provided

1847

request.auth = auth

1848

request.proxy_auth = proxy_auth

1849

1850

if self._debuglevel > 0:

1851

print('perform: %s base: %s, url: %s' % (request.method, self.base,

1852

request.get_full_url()))

1853

response = self._opener.open(request)

1854

if self._get_connection() is not request.connection:

1855

# First connection or reconnection

1856

self._set_connection(request.connection,

1857

(request.auth, request.proxy_auth))

1858

else:

1859

# http may change the credentials while keeping the

1860

# connection opened

1861

self._update_credentials((request.auth, request.proxy_auth))

1862

1863

code = response.code

1864

if (request.follow_redirections is False

1865

and code in (301, 302, 303, 307, 308)):

1866

raise errors.RedirectRequested(request.get_full_url(),

1867

request.redirected_to,

1868

is_permanent=(code in (301, 308)))

1869

1870

if request.redirected_to is not None:

1871

trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),

1872

request.redirected_to))

1873

1874

class Urllib3LikeResponse(object):

1875

1876

def __init__(self, actual):

1877

self._actual = actual

1878

self._data = None

1879

1880

def getheader(self, name, default=None):

1881

if self._actual.headers is None:

1882

raise http_client.ResponseNotReady()

1883

return self._actual.headers.get(name, default)

1884

1885

def getheaders(self):

1886

if self._actual.headers is None:

1887

raise http_client.ResponseNotReady()

1888

return list(self._actual.headers.items())

1889

1890

@property

1891

def status(self):

1892

return self._actual.code

1893

1894

@property

1895

def reason(self):

1896

return self._actual.reason

1897

1898

@property

1899

def data(self):

1900

if self._data is None:

1901

self._data = self._actual.read()

1902

return self._data

1903

1904

@property

1905

def text(self):

1906

if self.status == 204:

1907

return None

1908

charset = cgi.parse_header(

1909

self._actual.headers['Content-Type'])[1].get('charset')

1910

if charset:

1911

return self.data.decode(charset)

1912

else:

1913

return self.data.decode()

1914

1915

def read(self, amt=None):

1916

return self._actual.read(amt)

1917

1918

def readlines(self):

1919

return self._actual.readlines()

1920

1921

def readline(self, size=-1):

1922

return self._actual.readline(size)

1923

1924

return Urllib3LikeResponse(response)

1925

1926

def disconnect(self):

1927

connection = self._get_connection()

1928

if connection is not None:

1929

connection.close()

1930

1931

def has(self, relpath):

1932

"""Does the target location exist?

1933

"""

1934

response = self._head(relpath)

1935

1936

code = response.status

1937

if code == 200: # "ok",

1938

return True

1939

else:

1940

return False

1941

1942

def get(self, relpath):

1943

"""Get the file at the given relative path.

1944

1945

:param relpath: The relative path to the file

1946

"""

1947

code, response_file = self._get(relpath, None)

1948

return response_file

1949

1950

def _get(self, relpath, offsets, tail_amount=0):

1951

"""Get a file, or part of a file.

1952

1953

:param relpath: Path relative to transport base URL

1954

:param offsets: None to get the whole file;

1955

or a list of _CoalescedOffset to fetch parts of a file.

1956

:param tail_amount: The amount to get from the end of the file.

1957

1958

:returns: (http_code, result_file)

1959

"""

1960

abspath = self._remote_path(relpath)

1961

headers = {}

1962

if offsets or tail_amount:

1963

range_header = self._attempted_range_header(offsets, tail_amount)

1964

if range_header is not None:

1965

bytes = 'bytes=' + range_header

1966

headers = {'Range': bytes}

1967

else:

1968

range_header = None

1969

1970

response = self.request('GET', abspath, headers=headers)

1971

1972

if response.status == 404: # not found

1973

raise errors.NoSuchFile(abspath)

1974

elif response.status == 416:

1975

# We don't know which, but one of the ranges we specified was

1976

# wrong.

1977

raise errors.InvalidHttpRange(abspath, range_header,

1978

'Server return code %d' % response.status)

1979

elif response.status == 400:

1980

if range_header:

1981

# We don't know which, but one of the ranges we specified was

1982

# wrong.

1983

raise errors.InvalidHttpRange(

1984

abspath, range_header,

1985

'Server return code %d' % response.status)

1986

else:

1987

raise errors.BadHttpRequest(abspath, response.reason)

1988

elif response.status not in (200, 206):

1989

raise errors.UnexpectedHttpStatus(abspath, response.status)

1990

1991

data = handle_response(

1992

abspath, response.status, response.getheader, response)

1993

return response.status, data

1994

1995

def _remote_path(self, relpath):

1996

"""See ConnectedTransport._remote_path.

1997

1998

user and passwords are not embedded in the path provided to the server.

1999

"""

2000

url = self._parsed_url.clone(relpath)

2001

url.user = url.quoted_user = None

2002

url.password = url.quoted_password = None

2003

url.scheme = self._unqualified_scheme

2004

return str(url)

2005

2006

def _create_auth(self):

2007

"""Returns a dict containing the credentials provided at build time."""

2008

auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,

2009

user=self._parsed_url.user, password=self._parsed_url.password,

2010

protocol=self._unqualified_scheme,

2011

path=self._parsed_url.path)

2012

return auth

2013

2014

def get_smart_medium(self):

2015

"""See Transport.get_smart_medium."""

2016

if self._medium is None:

2017

# Since medium holds some state (smart server probing at least), we

2018

# need to keep it around. Note that this is needed because medium

2019

# has the same 'base' attribute as the transport so it can't be

2020

# shared between transports having different bases.

2021

self._medium = SmartClientHTTPMedium(self)

2022

return self._medium

2023

2024

def _degrade_range_hint(self, relpath, ranges):

2025

if self._range_hint == 'multi':

2026

self._range_hint = 'single'

2027

mutter('Retry "%s" with single range request' % relpath)

2028

elif self._range_hint == 'single':

2029

self._range_hint = None

2030

mutter('Retry "%s" without ranges' % relpath)

2031

else:

2032

# We tried all the tricks, but nothing worked, caller must reraise.

2033

return False

2034

return True

2035

2036

# _coalesce_offsets is a helper for readv, it try to combine ranges without

2037

# degrading readv performances. _bytes_to_read_before_seek is the value

2038

# used for the limit parameter and has been tuned for other transports. For

2039

# HTTP, the name is inappropriate but the parameter is still useful and

2040

# helps reduce the number of chunks in the response. The overhead for a

2041

# chunk (headers, length, footer around the data itself is variable but

2042

# around 50 bytes. We use 128 to reduce the range specifiers that appear in

2043

# the header, some servers (notably Apache) enforce a maximum length for a

2044

# header and issue a '400: Bad request' error when too much ranges are

2045

# specified.

2046

_bytes_to_read_before_seek = 128

2047

# No limit on the offset number that get combined into one, we are trying

2048

# to avoid downloading the whole file.

2049

_max_readv_combine = 0

2050

# By default Apache has a limit of ~400 ranges before replying with a 400

2051

# Bad Request. So we go underneath that amount to be safe.

2052

_max_get_ranges = 200

2053

# We impose no limit on the range size. But see _pycurl.py for a different

2054

# use.

2055

_get_max_size = 0

2056

2057

def _readv(self, relpath, offsets):

2058

"""Get parts of the file at the given relative path.

2059

2060

:param offsets: A list of (offset, size) tuples.

2061

:param return: A list or generator of (offset, data) tuples

2062

"""

2063

# offsets may be a generator, we will iterate it several times, so

2064

# build a list

2065

offsets = list(offsets)

2066

2067

try_again = True

2068

retried_offset = None

2069

while try_again:

2070

try_again = False

2071

2072

# Coalesce the offsets to minimize the GET requests issued

2073

sorted_offsets = sorted(offsets)

2074

coalesced = self._coalesce_offsets(

2075

sorted_offsets, limit=self._max_readv_combine,

2076

fudge_factor=self._bytes_to_read_before_seek,

2077

max_size=self._get_max_size)

2078

2079

# Turn it into a list, we will iterate it several times

2080

coalesced = list(coalesced)

2081

if 'http' in debug.debug_flags:

2082

mutter('http readv of %s offsets => %s collapsed %s',

2083

relpath, len(offsets), len(coalesced))

2084

2085

# Cache the data read, but only until it's been used

2086

data_map = {}

2087

# We will iterate on the data received from the GET requests and

2088

# serve the corresponding offsets respecting the initial order. We

2089

# need an offset iterator for that.

2090

iter_offsets = iter(offsets)

2091

try:

2092

cur_offset_and_size = next(iter_offsets)

2093

except StopIteration:

2094

return

2095

2096

try:

2097

for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):

2098

# Split the received chunk

2099

for offset, size in cur_coal.ranges:

2100

start = cur_coal.start + offset

2101

rfile.seek(start, os.SEEK_SET)

2102

data = rfile.read(size)

2103

data_len = len(data)

2104

if data_len != size:

2105

raise errors.ShortReadvError(relpath, start, size,

2106

actual=data_len)

2107

if (start, size) == cur_offset_and_size:

2108

# The offset requested are sorted as the coalesced

2109

# ones, no need to cache. Win !

2110

yield cur_offset_and_size[0], data

2111

try:

2112

cur_offset_and_size = next(iter_offsets)

2113

except StopIteration:

2114

return

2115

else:

2116

# Different sorting. We need to cache.

2117

data_map[(start, size)] = data

2118

2119

# Yield everything we can

2120

while cur_offset_and_size in data_map:

2121

# Clean the cached data since we use it

2122

# XXX: will break if offsets contains duplicates --

2123

# vila20071129

2124

this_data = data_map.pop(cur_offset_and_size)

2125

yield cur_offset_and_size[0], this_data

2126

try:

2127

cur_offset_and_size = next(iter_offsets)

2128

except StopIteration:

2129

return

2130

2131

except (errors.ShortReadvError, errors.InvalidRange,

2132

errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:

2133

mutter('Exception %r: %s during http._readv', e, e)

2134

if (not isinstance(e, errors.ShortReadvError)

2135

or retried_offset == cur_offset_and_size):

2136

# We don't degrade the range hint for ShortReadvError since

2137

# they do not indicate a problem with the server ability to

2138

# handle ranges. Except when we fail to get back a required

2139

# offset twice in a row. In that case, falling back to

2140

# single range or whole file should help.

2141

if not self._degrade_range_hint(relpath, coalesced):

2142

raise

2143

# Some offsets may have been already processed, so we retry

2144

# only the unsuccessful ones.

2145

offsets = [cur_offset_and_size] + [o for o in iter_offsets]

2146

retried_offset = cur_offset_and_size

2147

try_again = True

2148

2149

def _coalesce_readv(self, relpath, coalesced):

2150

"""Issue several GET requests to satisfy the coalesced offsets"""

2151

2152

def get_and_yield(relpath, coalesced):

2153

if coalesced:

2154

# Note that the _get below may raise

2155

# errors.InvalidHttpRange. It's the caller's responsibility to

2156

# decide how to retry since it may provide different coalesced

2157

# offsets.

2158

code, rfile = self._get(relpath, coalesced)

2159

for coal in coalesced:

2160

yield coal, rfile

2161

2162

if self._range_hint is None:

2163

# Download whole file

2164

for c, rfile in get_and_yield(relpath, coalesced):

2165

yield c, rfile

2166

else:

2167

total = len(coalesced)

2168

if self._range_hint == 'multi':

2169

max_ranges = self._max_get_ranges

2170

elif self._range_hint == 'single':

2171

max_ranges = total

2172

else:

2173

raise AssertionError("Unknown _range_hint %r"

2174

% (self._range_hint,))

2175

# TODO: Some web servers may ignore the range requests and return

2176

# the whole file, we may want to detect that and avoid further

2177

# requests.

2178

# Hint: test_readv_multiple_get_requests will fail once we do that

2179

cumul = 0

2180

ranges = []

2181

for coal in coalesced:

2182

if ((self._get_max_size > 0

2183

and cumul + coal.length > self._get_max_size) or

2184

len(ranges) >= max_ranges):

2185

# Get that much and yield

2186

for c, rfile in get_and_yield(relpath, ranges):

2187

yield c, rfile

2188

# Restart with the current offset

2189

ranges = [coal]

2190

cumul = coal.length

2191

else:

2192

ranges.append(coal)

2193

cumul += coal.length

2194

# Get the rest and yield

2195

for c, rfile in get_and_yield(relpath, ranges):

2196

yield c, rfile

2197

2198

def recommended_page_size(self):

2199

"""See Transport.recommended_page_size().

2200

2201

For HTTP we suggest a large page size to reduce the overhead

2202

introduced by latency.

2203

"""

2204

return 64 * 1024

2205

2206

def _post(self, body_bytes):

2207

"""POST body_bytes to .bzr/smart on this transport.

2208

2209

:returns: (response code, response body file-like object).

2210

"""

2211

# TODO: Requiring all the body_bytes to be available at the beginning of

2212

# the POST may require large client buffers. It would be nice to have

2213

# an interface that allows streaming via POST when possible (and

2214

# degrades to a local buffer when not).

2215

abspath = self._remote_path('.bzr/smart')

2216

response = self.request(

2217

'POST', abspath, body=body_bytes,

2218

headers={'Content-Type': 'application/octet-stream'})

2219

if response.status not in (200, 403):

2220

raise errors.UnexpectedHttpStatus(abspath, response.status)

2221

code = response.status

2222

data = handle_response(

2223

abspath, code, response.getheader, response)

2224

return code, data

2225

2226

def _head(self, relpath):

2227

"""Request the HEAD of a file.

2228

2229

Performs the request and leaves callers handle the results.

2230

"""

2231

abspath = self._remote_path(relpath)

2232

response = self.request('HEAD', abspath)

2233

if response.status not in (200, 404):

2234

raise errors.UnexpectedHttpStatus(abspath, response.status)

2235

2236

return response

2237

2238

raise NotImplementedError(self._post)

2239

2240

def put_file(self, relpath, f, mode=None):

2241

"""Copy the file-like object into the location.

2242

2243

:param relpath: Location to put the contents, relative to base.

2244

:param f: File-like object.

2245

"""

2246

raise errors.TransportNotPossible('http PUT not supported')

2247

2248

def mkdir(self, relpath, mode=None):

2249

"""Create a directory at the given path."""

2250

raise errors.TransportNotPossible('http does not support mkdir()')

2251

2252

def rmdir(self, relpath):

2253

"""See Transport.rmdir."""

2254

raise errors.TransportNotPossible('http does not support rmdir()')

2255

2256

def append_file(self, relpath, f, mode=None):

2257

"""Append the text in the file-like object into the final

2258

location.

2259

"""

2260

raise errors.TransportNotPossible('http does not support append()')

2261

2262

def copy(self, rel_from, rel_to):

2263

"""Copy the item at rel_from to the location at rel_to"""

2264

raise errors.TransportNotPossible('http does not support copy()')

2265

2266

def copy_to(self, relpaths, other, mode=None, pb=None):

2267

"""Copy a set of entries from self into another Transport.

2268

2269

:param relpaths: A list/generator of entries to be copied.

2270

2271

TODO: if other is LocalTransport, is it possible to

2272

do better than put(get())?

2273

"""

2274

# At this point HttpTransport might be able to check and see if

2275

# the remote location is the same, and rather than download, and

2276

# then upload, it could just issue a remote copy_this command.

2277

if isinstance(other, HttpTransport):

2278

raise errors.TransportNotPossible(

2279

'http cannot be the target of copy_to()')

2280

else:

2281

return super(HttpTransport, self).\

2282

copy_to(relpaths, other, mode=mode, pb=pb)

2283

2284

def move(self, rel_from, rel_to):

2285

"""Move the item at rel_from to the location at rel_to"""

2286

raise errors.TransportNotPossible('http does not support move()')

2287

2288

def delete(self, relpath):

2289

"""Delete the item at relpath"""

2290

raise errors.TransportNotPossible('http does not support delete()')

2291

2292

def external_url(self):

2293

"""See breezy.transport.Transport.external_url."""

2294

# HTTP URL's are externally usable as long as they don't mention their

2295

# implementation qualifier

2296

url = self._parsed_url.clone()

2297

url.scheme = self._unqualified_scheme

2298

return str(url)

2299

2300

def is_readonly(self):

2301

"""See Transport.is_readonly."""

2302

return True

2303

2304

def listable(self):

2305

"""See Transport.listable."""

2306

return False

2307

2308

def stat(self, relpath):

2309

"""Return the stat information for a file.

2310

"""

2311

raise errors.TransportNotPossible('http does not support stat()')

2312

2313

def lock_read(self, relpath):

2314

"""Lock the given file for shared (read) access.

2315

:return: A lock object, which should be passed to Transport.unlock()

2316

"""

2317

# The old RemoteBranch ignore lock for reading, so we will

2318

# continue that tradition and return a bogus lock object.

2319

class BogusLock(object):

2320

def __init__(self, path):

2321

self.path = path

2322

2323

def unlock(self):

2324

pass

2325

return BogusLock(relpath)

2326

2327

def lock_write(self, relpath):

2328

"""Lock the given file for exclusive (write) access.

2329

WARNING: many transports do not support this, so trying avoid using it

2330

2331

:return: A lock object, which should be passed to Transport.unlock()

2332

"""

2333

raise errors.TransportNotPossible('http does not support lock_write()')

2334

2335

def _attempted_range_header(self, offsets, tail_amount):

2336

"""Prepare a HTTP Range header at a level the server should accept.

2337

2338

:return: the range header representing offsets/tail_amount or None if

2339

no header can be built.

2340

"""

2341

2342

if self._range_hint == 'multi':

2343

# Generate the header describing all offsets

2344

return self._range_header(offsets, tail_amount)

2345

elif self._range_hint == 'single':

2346

# Combine all the requested ranges into a single

2347

# encompassing one

2348

if len(offsets) > 0:

2349

if tail_amount not in (0, None):

2350

# Nothing we can do here to combine ranges with tail_amount

2351

# in a single range, just returns None. The whole file

2352

# should be downloaded.

2353

return None

2354

else:

2355

start = offsets[0].start

2356

last = offsets[-1]

2357

end = last.start + last.length - 1

2358

whole = self._coalesce_offsets([(start, end - start + 1)],

2359

limit=0, fudge_factor=0)

2360

return self._range_header(list(whole), 0)

2361

else:

2362

# Only tail_amount, requested, leave range_header

2363

# do its work

2364

return self._range_header(offsets, tail_amount)

2365

else:

2366

return None

2367

2368

@staticmethod

2369

def _range_header(ranges, tail_amount):

2370

"""Turn a list of bytes ranges into a HTTP Range header value.

2371

2372

:param ranges: A list of _CoalescedOffset

2373

:param tail_amount: The amount to get from the end of the file.

2374

2375

:return: HTTP range header string.

2376

2377

At least a non-empty ranges *or* a tail_amount must be

2378

provided.

2379

"""

2380

strings = []

2381

for offset in ranges:

2382

strings.append('%d-%d' % (offset.start,

2383

offset.start + offset.length - 1))

2384

2385

if tail_amount:

2386

strings.append('-%d' % tail_amount)

2387

2388

return ','.join(strings)

2389

2390

def _redirected_to(self, source, target):

2391

"""Returns a transport suitable to re-issue a redirected request.

2392

2393

:param source: The source url as returned by the server.

2394

:param target: The target url as returned by the server.

2395

2396

The redirection can be handled only if the relpath involved is not

2397

renamed by the redirection.

2398

2399

:returns: A transport

2400

:raise UnusableRedirect: when the URL can not be reinterpreted

2401

"""

2402

parsed_source = self._split_url(source)

2403

parsed_target = self._split_url(target)

2404

pl = len(self._parsed_url.path)

2405

# determine the excess tail - the relative path that was in

2406

# the original request but not part of this transports' URL.

2407

excess_tail = parsed_source.path[pl:].strip("/")

2408

if not parsed_target.path.endswith(excess_tail):

2409

# The final part of the url has been renamed, we can't handle the

2410

# redirection.

2411

raise UnusableRedirect(

2412

source, target, "final part of the url was renamed")

2413

2414

target_path = parsed_target.path

2415

if excess_tail:

2416

# Drop the tail that was in the redirect but not part of

2417

# the path of this transport.

2418

target_path = target_path[:-len(excess_tail)]

2419

2420

if parsed_target.scheme in ('http', 'https'):

2421

# Same protocol family (i.e. http[s]), we will preserve the same

2422

# http client implementation when a redirection occurs from one to

2423

# the other (otherwise users may be surprised that bzr switches

2424

# from one implementation to the other, and devs may suffer

2425

# debugging it).

2426

if (parsed_target.scheme == self._unqualified_scheme

2427

and parsed_target.host == self._parsed_url.host

2428

and parsed_target.port == self._parsed_url.port

2429

and (parsed_target.user is None or

2430

parsed_target.user == self._parsed_url.user)):

2431

# If a user is specified, it should match, we don't care about

2432

# passwords, wrong passwords will be rejected anyway.

2433

return self.clone(target_path)

2434

else:

2435

# Rebuild the url preserving the scheme qualification and the

2436

# credentials (if they don't apply, the redirected to server

2437

# will tell us, but if they do apply, we avoid prompting the

2438

# user)

2439

redir_scheme = parsed_target.scheme

2440

new_url = self._unsplit_url(redir_scheme,

2441

self._parsed_url.user,

2442

self._parsed_url.password,

2443

parsed_target.host, parsed_target.port,

2444

target_path)

2445

return transport.get_transport_from_url(new_url)

2446

else:

2447

# Redirected to a different protocol

2448

new_url = self._unsplit_url(parsed_target.scheme,

2449

parsed_target.user,

2450

parsed_target.password,

2451

parsed_target.host, parsed_target.port,

2452

target_path)

2453

return transport.get_transport_from_url(new_url)

2454

2455

def _options(self, relpath):

2456

abspath = self._remote_path(relpath)

2457

resp = self.request('OPTIONS', abspath)

2458

if resp.status == 404:

2459

raise errors.NoSuchFile(abspath)

2460

if resp.status in (403, 405):

2461

raise errors.InvalidHttpResponse(

2462

abspath,

2463

"OPTIONS not supported or forbidden for remote URL")

2464

return resp.getheaders()

2465

2466

2467

# TODO: May be better located in smart/medium.py with the other

2468

# SmartMedium classes

2469

class SmartClientHTTPMedium(medium.SmartClientMedium):

2470

2471

def __init__(self, http_transport):

2472

super(SmartClientHTTPMedium, self).__init__(http_transport.base)

2473

# We don't want to create a circular reference between the http

2474

# transport and its associated medium. Since the transport will live

2475

# longer than the medium, the medium keep only a weak reference to its

2476

# transport.

2477

self._http_transport_ref = weakref.ref(http_transport)

2478

2479

def get_request(self):

2480

return SmartClientHTTPMediumRequest(self)

2481

2482

def should_probe(self):

2483

return True

2484

2485

def remote_path_from_transport(self, transport):

2486

# Strip the optional 'bzr+' prefix from transport so it will have the

2487

# same scheme as self.

2488

transport_base = transport.base

2489

if transport_base.startswith('bzr+'):

2490

transport_base = transport_base[4:]

2491

rel_url = urlutils.relative_url(self.base, transport_base)

2492

return urlutils.unquote(rel_url)

2493

2494

def send_http_smart_request(self, bytes):

2495

try:

2496

# Get back the http_transport hold by the weak reference

2497

t = self._http_transport_ref()

2498

code, body_filelike = t._post(bytes)

2499

if code != 200:

2500

raise errors.UnexpectedHttpStatus(

2501

t._remote_path('.bzr/smart'), code)

2502

except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:

2503

raise errors.SmartProtocolError(str(e))

2504

return body_filelike

2505

2506

def _report_activity(self, bytes, direction):

2507

"""See SmartMedium._report_activity.

2508

2509

Does nothing; the underlying plain HTTP transport will report the

2510

activity that this medium would report.

2511

"""

2512

pass

2513

2514

def disconnect(self):

2515

"""See SmartClientMedium.disconnect()."""

2516

t = self._http_transport_ref()

2517

t.disconnect()

2518

2519

2520

# TODO: May be better located in smart/medium.py with the other

2521

# SmartMediumRequest classes

2522

class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):

2523

"""A SmartClientMediumRequest that works with an HTTP medium."""

2524

2525

def __init__(self, client_medium):

2526

medium.SmartClientMediumRequest.__init__(self, client_medium)

2527

self._buffer = b''

2528

2529

def _accept_bytes(self, bytes):

2530

self._buffer += bytes

2531

2532

def _finished_writing(self):

2533

data = self._medium.send_http_smart_request(self._buffer)

2534

self._response_body = data

2535

2536

def _read_bytes(self, count):

2537

"""See SmartClientMediumRequest._read_bytes."""

2538

return self._response_body.read(count)

2539

2540

def _read_line(self):

2541

line, excess = medium._get_line(self._response_body.read)

2542

if excess != b'':

2543

raise AssertionError(

2544

'_get_line returned excess bytes, but this mediumrequest '

2545

'cannot handle excess. (%r)' % (excess,))

2546

return line

2547

2548

def _finished_reading(self):

2549

"""See SmartClientMediumRequest._finished_reading."""

2550

pass

2551

2552

2553

def unhtml_roughly(maybe_html, length_limit=1000):

2554

"""Very approximate html->text translation, for presenting error bodies.

2555

2556

:param length_limit: Truncate the result to this many characters.

2557

2558

>>> unhtml_roughly("<b>bad</b> things happened\\n")

2559

' bad things happened '

2560

"""

2561

return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit]

2562

2563

2564

def get_test_permutations():

2565

"""Return the permutations to be used in testing."""

2566

from breezy.tests import (

2567

features,

2568

http_server,

2569

)

2570

permutations = [(HttpTransport, http_server.HttpServer), ]

2571

if features.HTTPSServerFeature.available():

2572

from breezy.tests import (

2573

https_server,

2574

ssl_certs,

2575

)

2576

2577

class HTTPS_transport(HttpTransport):

2578

2579

def __init__(self, base, _from_transport=None):

2580

super(HTTPS_transport, self).__init__(

2581

base, _from_transport=_from_transport,

2582

ca_certs=ssl_certs.build_path('ca.crt'))

2583

2584

permutations.append((HTTPS_transport,

2585

https_server.HTTPSServer))

2586

return permutations