/brz/remove-bazaar : revision 2692.1.17

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Andrew Bennetts
Date: 2008-01-04 03:12:11 UTC
mfrom: (3164 +trunk)
mto: This revision was merged to the branch mainline in revision 3320.
Revision ID: andrew.bennetts@canonical.com-20080104031211-wy4uxo2j4elvip1j

Merge from bzr.dev.

files added:
bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/tests/test_http_implementations.py

bzrlib/version_info_formats/format_custom.py

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-reference/readme.txt

index.txt

files removed:
doc/en/user-reference/index.txt

files renamed:
bzrlib/help_topics.py => bzrlib/help_topics/__init__.py

doc/en/user-guide/authentication_conf.txt => bzrlib/help_topics/en/authentication.txt

doc/en/user-guide/configuration.txt => bzrlib/help_topics/en/configuration.txt

doc/en/user-guide/conflicts.txt => bzrlib/help_topics/en/conflicts.txt

doc/en/user-reference/hooks.txt => bzrlib/help_topics/en/hooks.txt

bzrlib/tests/HttpServer.py => bzrlib/tests/http_server.py

bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/_patiencediff_c.c

bzrlib/branch.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/graph.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/smart/medium.py

bzrlib/smart/vfs.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/remote.py

bzrlib/tree.py

bzrlib/urlutils.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/bundles.txt

doc/developers/index.txt

doc/developers/packrepo.txt

doc/en/mini-tutorial/index.txt

doc/en/quick-reference/quick-start-summary.svg

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/working_offline_central.txt

doc/index.txt

setup.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

return url

def _extract_headers(header_text, url):

"""Extract the mapping for an rfc2822 header

This is a helper function for the test suite and for _pycurl.

(urllib already parses the headers for us)

In the case that there are multiple headers inside the file,

the last one is returned.

:param header_text: A string of header information.

This expects that the first line of a header will always be HTTP ...

:param url: The url we are parsing, so we can raise nice errors

:return: mimetools.Message object, which basically acts like a case

insensitive dictionary.

"""

first_header = True

remaining = header_text

if not remaining:

100

raise errors.InvalidHttpResponse(url, 'Empty headers')

101

102

while remaining:

103

header_file = StringIO(remaining)

104

first_line = header_file.readline()

105

if not first_line.startswith('HTTP'):

106

if first_header: # The first header *must* start with HTTP

107

raise errors.InvalidHttpResponse(url,

108

'Opening header line did not start with HTTP: %s'

109

% (first_line,))

110

else:

111

break # We are done parsing

112

first_header = False

113

m = mimetools.Message(header_file)

114

115

# mimetools.Message parses the first header up to a blank line

116

# So while there is remaining data, it probably means there is

117

# another header to be parsed.

118

# Get rid of any preceeding whitespace, which if it is all whitespace

119

# will get rid of everything.

120

remaining = header_file.read().lstrip()

121

return m

122

123

124

class HttpTransportBase(ConnectedTransport, medium.SmartClientMedium):

125

"""Base class for http implementations.

126

157

114

else:

158

115

self._range_hint = 'multi'

159

116

160

def _remote_path(self, relpath):

161

"""Produce absolute path, adjusting protocol."""

162

relative = urlutils.unescape(relpath).encode('utf-8')

163

path = self._combine_paths(self._path, relative)

164

return self._unsplit_url(self._unqualified_scheme,

165

self._user, self._password,

166

self._host, self._port,

167

path)

168

169

117

def has(self, relpath):

170

118

raise NotImplementedError("has() is abstract on %r" % self)

171

119

175

123

:param relpath: The relative path to the file

176

124

"""

177

125

code, response_file = self._get(relpath, None)

178

return response_file

126

# FIXME: some callers want an iterable... One step forward, three steps

127

# backwards :-/ And not only an iterable, but an iterable that can be

128

# seeked backwards, so we will never be able to do that. One such

129

# known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the

130

# time of this writing it's even the only known client -- vila20071203

131

return StringIO(response_file.read())

179

132

180

133

def _get(self, relpath, ranges, tail_amount=0):

181

134

"""Get a file, or part of a file.

189

142

"""

190

143

raise NotImplementedError(self._get)

191

144

145

def _remote_path(self, relpath):

146

"""See ConnectedTransport._remote_path.

147

148

user and passwords are not embedded in the path provided to the server.

149

"""

150

relative = urlutils.unescape(relpath).encode('utf-8')

151

path = self._combine_paths(self._path, relative)

152

return self._unsplit_url(self._unqualified_scheme,

153

None, None, self._host, self._port, path)

154

155

def _create_auth(self):

156

"""Returns a dict returning the credentials provided at build time."""

157

auth = dict(host=self._host, port=self._port,

158

user=self._user, password=self._password,

159

protocol=self._unqualified_scheme,

160

path=self._path)

161

return auth

162

192

163

def get_request(self):

193

164

return SmartClientHTTPMediumRequest(self)

194

165

213

184

# further tries were unsuccessful

214

185

raise exc_info[0], exc_info[1], exc_info[2]

215

186

216

def _get_ranges_hinted(self, relpath, ranges):

217

"""Issue a ranged GET request taking server capabilities into account.

218

219

Depending of the errors returned by the server, we try several GET

220

requests, trying to minimize the data transferred.

221

222

:param relpath: Path relative to transport base URL

223

:param ranges: None to get the whole file;

224

or a list of _CoalescedOffset to fetch parts of a file.

225

:returns: A file handle containing at least the requested ranges.

226

"""

227

exc_info = None

228

try_again = True

229

while try_again:

230

try_again = False

231

try:

232

code, f = self._get(relpath, ranges)

233

except errors.InvalidRange, e:

234

if exc_info is None:

235

exc_info = sys.exc_info()

236

self._degrade_range_hint(relpath, ranges, exc_info)

237

try_again = True

238

return f

239

240

187

# _coalesce_offsets is a helper for readv, it try to combine ranges without

241

188

# degrading readv performances. _bytes_to_read_before_seek is the value

242

189

# used for the limit parameter and has been tuned for other transports. For

254

201

# By default Apache has a limit of ~400 ranges before replying with a 400

255

202

# Bad Request. So we go underneath that amount to be safe.

256

203

_max_get_ranges = 200

204

# We impose no limit on the range size. But see _pycurl.py for a different

205

# use.

206

_get_max_size = 0

257

207

258

208

def _readv(self, relpath, offsets):

259

209

"""Get parts of the file at the given relative path.

262

212

:param return: A list or generator of (offset, data) tuples

263

213

"""

264

214

265

# offsets may be a genarator, we will iterate it several times, so

215

# offsets may be a generator, we will iterate it several times, so

266

216

# build a list

267

217

offsets = list(offsets)

268

218

269

219

try_again = True

220

retried_offset = None

270

221

while try_again:

271

222

try_again = False

272

223

274

225

sorted_offsets = sorted(offsets)

275

226

coalesced = self._coalesce_offsets(

276

227

sorted_offsets, limit=self._max_readv_combine,

277

fudge_factor=self._bytes_to_read_before_seek)

228

fudge_factor=self._bytes_to_read_before_seek,

229

max_size=self._get_max_size)

278

230

279

231

# Turn it into a list, we will iterate it several times

280

232

coalesced = list(coalesced)

284

236

# Cache the data read, but only until it's been used

285

237

data_map = {}

286

238

# We will iterate on the data received from the GET requests and

287

# serve the corresponding offsets repecting the initial order. We

239

# serve the corresponding offsets respecting the initial order. We

288

240

# need an offset iterator for that.

289

241

iter_offsets = iter(offsets)

290

242

cur_offset_and_size = iter_offsets.next()

291

243

292

244

try:

293

for cur_coal, file in self._coalesce_readv(relpath, coalesced):

245

for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):

294

246

# Split the received chunk

295

247

for offset, size in cur_coal.ranges:

296

248

start = cur_coal.start + offset

297

file.seek(start, 0)

298

data = file.read(size)

249

rfile.seek(start, 0)

250

data = rfile.read(size)

299

251

data_len = len(data)

300

252

if data_len != size:

301

253

raise errors.ShortReadvError(relpath, start, size,

302

254

actual=data_len)

303

data_map[(start, size)] = data

255

if (start, size) == cur_offset_and_size:

256

# The offset requested are sorted as the coalesced

257

# ones, no need to cache. Win !

258

yield cur_offset_and_size[0], data

259

cur_offset_and_size = iter_offsets.next()

260

else:

261

# Different sorting. We need to cache.

262

data_map[(start, size)] = data

304

263

305

264

# Yield everything we can

306

265

while cur_offset_and_size in data_map:

311

270

yield cur_offset_and_size[0], this_data

312

271

cur_offset_and_size = iter_offsets.next()

313

272

314

except (errors.ShortReadvError,errors.InvalidRange), e:

315

self._degrade_range_hint(relpath, coalesced, sys.exc_info())

273

except (errors.ShortReadvError, errors.InvalidRange,

274

errors.InvalidHttpRange), e:

275

mutter('Exception %r: %s during http._readv',e, e)

276

if (not isinstance(e, errors.ShortReadvError)

277

or retried_offset == cur_offset_and_size):

278

# We don't degrade the range hint for ShortReadvError since

279

# they do not indicate a problem with the server ability to

280

# handle ranges. Except when we fail to get back a required

281

# offset twice in a row. In that case, falling back to

282

# single range or whole file should help or end up in a

283

# fatal exception.

284

self._degrade_range_hint(relpath, coalesced, sys.exc_info())

316

285

# Some offsets may have been already processed, so we retry

317

286

# only the unsuccessful ones.

318

287

offsets = [cur_offset_and_size] + [o for o in iter_offsets]

288

retried_offset = cur_offset_and_size

319

289

try_again = True

320

290

321

291

def _coalesce_readv(self, relpath, coalesced):

322

292

"""Issue several GET requests to satisfy the coalesced offsets"""

323

total = len(coalesced)

324

if self._range_hint == 'multi':

325

max_ranges = self._max_get_ranges

326

elif self._range_hint == 'single':

327

max_ranges = total

293

294

def get_and_yield(relpath, coalesced):

295

if coalesced:

296

# Note that the _get below may raise

297

# errors.InvalidHttpRange. It's the caller's responsibility to

298

# decide how to retry since it may provide different coalesced

299

# offsets.

300

code, rfile = self._get(relpath, coalesced)

301

for coal in coalesced:

302

yield coal, rfile

303

304

if self._range_hint is None:

305

# Download whole file

306

for c, rfile in get_and_yield(relpath, coalesced):

307

yield c, rfile

328

308

else:

329

# The whole file will be downloaded anyway

330

max_ranges = total

331

# TODO: Some web servers may ignore the range requests and return the

332

# whole file, we may want to detect that and avoid further requests.

333

# Hint: test_readv_multiple_get_requests will fail in that case .

334

for group in xrange(0, len(coalesced), max_ranges):

335

ranges = coalesced[group:group+max_ranges]

336

# Note that the following may raise errors.InvalidRange. It's the

337

# caller responsability to decide how to retry since it may provide

338

# different coalesced offsets.

339

code, file = self._get(relpath, ranges)

340

for range in ranges:

341

yield range, file

309

total = len(coalesced)

310

if self._range_hint == 'multi':

311

max_ranges = self._max_get_ranges

312

elif self._range_hint == 'single':

313

max_ranges = total

314

else:

315

raise AssertionError("Unknown _range_hint %r"

316

% (self._range_hint,))

317

# TODO: Some web servers may ignore the range requests and return

318

# the whole file, we may want to detect that and avoid further

319

# requests.

320

# Hint: test_readv_multiple_get_requests will fail once we do that

321

cumul = 0

322

ranges = []

323

for coal in coalesced:

324

if ((self._get_max_size > 0

325

and cumul + coal.length > self._get_max_size)

326

or len(ranges) >= max_ranges):

327

# Get that much and yield

328

for c, rfile in get_and_yield(relpath, ranges):

329

yield c, rfile

330

# Restart with the current offset

331

ranges = [coal]

332

cumul = coal.length

333

else:

334

ranges.append(coal)

335

cumul += coal.length

336

# Get the rest and yield

337

for c, rfile in get_and_yield(relpath, ranges):

338

yield c, rfile

342

339

343

340

def recommended_page_size(self):

344

341

"""See Transport.recommended_page_size().

348

345

"""

349

346

return 64 * 1024

350

347

351

@staticmethod

352

@deprecated_method(zero_seventeen)

353

def offsets_to_ranges(offsets):

354

"""Turn a list of offsets and sizes into a list of byte ranges.

355

356

:param offsets: A list of tuples of (start, size). An empty list

357

is not accepted.

358

:return: a list of inclusive byte ranges (start, end)

359

Adjacent ranges will be combined.

360

"""

361

# Make sure we process sorted offsets

362

offsets = sorted(offsets)

363

364

prev_end = None

365

combined = []

366

367

for start, size in offsets:

368

end = start + size - 1

369

if prev_end is None:

370

combined.append([start, end])

371

elif start <= prev_end + 1:

372

combined[-1][1] = end

373

else:

374

combined.append([start, end])

375

prev_end = end

376

377

return combined

378

379

348

def _post(self, body_bytes):

380

349

"""POST body_bytes to .bzr/smart on this transport.

381

350

490

459

return self.__class__(self.abspath(offset), self)

491

460

492

461

def _attempted_range_header(self, offsets, tail_amount):

493

"""Prepare a HTTP Range header at a level the server should accept"""

462

"""Prepare a HTTP Range header at a level the server should accept.

463

464

:return: the range header representing offsets/tail_amount or None if

465

no header can be built.

466

"""

494

467

495

468

if self._range_hint == 'multi':

496

469

# Generate the header describing all offsets

Older »