/brz/remove-bazaar : revision 3062.2.11

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

Committer: Aaron Bentley
Date: 2007-12-20 00:56:46 UTC
mfrom: (3131 +trunk)
mto: This revision was merged to the branch mainline in revision 3133.
Revision ID: aaron.bentley@utoronto.ca-20071220005646-cfebcxoxqtpsk3uo

Merge bzr.dev

files added:
bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/version_info_formats/format_custom.py

doc/en/user-guide/adv_merging.txt

doc/en/user-reference/readme.txt

index.txt

files removed:
doc/en/user-reference/index.txt

files renamed:
bzrlib/help_topics.py => bzrlib/help_topics/__init__.py

doc/en/user-guide/authentication_conf.txt => bzrlib/help_topics/en/authentication.txt

doc/en/user-guide/configuration.txt => bzrlib/help_topics/en/configuration.txt

doc/en/user-guide/conflicts.txt => bzrlib/help_topics/en/conflicts.txt

doc/en/user-reference/hooks.txt => bzrlib/help_topics/en/hooks.txt

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_patiencediff_c.c

bzrlib/branch.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cmd_version_info.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/graph.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/lockable_files.py

bzrlib/merge_directive.py

bzrlib/osutils.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/smart/medium.py

bzrlib/symbol_versioning.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_version_info.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/version_info_formats/__init__.py

bzrlib/workingtree.py

doc/en/mini-tutorial/index.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/version_info.txt

setup.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

Show diffs side-by-side

added added

removed removed

bzrlib/transport/http/__init__.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

return url

def _extract_headers(header_text, url):

"""Extract the mapping for an rfc2822 header

This is a helper function for the test suite and for _pycurl.

(urllib already parses the headers for us)

In the case that there are multiple headers inside the file,

the last one is returned.

:param header_text: A string of header information.

This expects that the first line of a header will always be HTTP ...

:param url: The url we are parsing, so we can raise nice errors

:return: mimetools.Message object, which basically acts like a case

insensitive dictionary.

"""

first_header = True

remaining = header_text

if not remaining:

100

raise errors.InvalidHttpResponse(url, 'Empty headers')

101

102

while remaining:

103

header_file = StringIO(remaining)

104

first_line = header_file.readline()

105

if not first_line.startswith('HTTP'):

106

if first_header: # The first header *must* start with HTTP

107

raise errors.InvalidHttpResponse(url,

108

'Opening header line did not start with HTTP: %s'

109

% (first_line,))

110

else:

111

break # We are done parsing

112

first_header = False

113

m = mimetools.Message(header_file)

114

115

# mimetools.Message parses the first header up to a blank line

116

# So while there is remaining data, it probably means there is

117

# another header to be parsed.

118

# Get rid of any preceeding whitespace, which if it is all whitespace

119

# will get rid of everything.

120

remaining = header_file.read().lstrip()

121

return m

122

123

124

class HttpTransportBase(ConnectedTransport, medium.SmartClientMedium):

125

"""Base class for http implementations.

126

175

132

:param relpath: The relative path to the file

176

133

"""

177

134

code, response_file = self._get(relpath, None)

178

return response_file

135

# FIXME: some callers want an iterable... One step forward, three steps

136

# backwards :-/ And not only an iterable, but an iterable that can be

137

# seeked backwards, so we will never be able to do that. One such

138

# known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the

139

# time of this writing it's even the only known client -- vila20071203

140

return StringIO(response_file.read())

179

141

180

142

def _get(self, relpath, ranges, tail_amount=0):

181

143

"""Get a file, or part of a file.

213

175

# further tries were unsuccessful

214

176

raise exc_info[0], exc_info[1], exc_info[2]

215

177

216

def _get_ranges_hinted(self, relpath, ranges):

217

"""Issue a ranged GET request taking server capabilities into account.

218

219

Depending of the errors returned by the server, we try several GET

220

requests, trying to minimize the data transferred.

221

222

:param relpath: Path relative to transport base URL

223

:param ranges: None to get the whole file;

224

or a list of _CoalescedOffset to fetch parts of a file.

225

:returns: A file handle containing at least the requested ranges.

226

"""

227

exc_info = None

228

try_again = True

229

while try_again:

230

try_again = False

231

try:

232

code, f = self._get(relpath, ranges)

233

except errors.InvalidRange, e:

234

if exc_info is None:

235

exc_info = sys.exc_info()

236

self._degrade_range_hint(relpath, ranges, exc_info)

237

try_again = True

238

return f

239

240

178

# _coalesce_offsets is a helper for readv, it try to combine ranges without

241

179

# degrading readv performances. _bytes_to_read_before_seek is the value

242

180

# used for the limit parameter and has been tuned for other transports. For

254

192

# By default Apache has a limit of ~400 ranges before replying with a 400

255

193

# Bad Request. So we go underneath that amount to be safe.

256

194

_max_get_ranges = 200

195

# We impose no limit on the range size. But see _pycurl.py for a different

196

# use.

197

_get_max_size = 0

257

198

258

199

def _readv(self, relpath, offsets):

259

200

"""Get parts of the file at the given relative path.

262

203

:param return: A list or generator of (offset, data) tuples

263

204

"""

264

205

265

# offsets may be a genarator, we will iterate it several times, so

206

# offsets may be a generator, we will iterate it several times, so

266

207

# build a list

267

208

offsets = list(offsets)

268

209

274

215

sorted_offsets = sorted(offsets)

275

216

coalesced = self._coalesce_offsets(

276

217

sorted_offsets, limit=self._max_readv_combine,

277

fudge_factor=self._bytes_to_read_before_seek)

218

fudge_factor=self._bytes_to_read_before_seek,

219

max_size=self._get_max_size)

278

220

279

221

# Turn it into a list, we will iterate it several times

280

222

coalesced = list(coalesced)

284

226

# Cache the data read, but only until it's been used

285

227

data_map = {}

286

228

# We will iterate on the data received from the GET requests and

287

# serve the corresponding offsets repecting the initial order. We

229

# serve the corresponding offsets respecting the initial order. We

288

230

# need an offset iterator for that.

289

231

iter_offsets = iter(offsets)

290

232

cur_offset_and_size = iter_offsets.next()

291

233

292

234

try:

293

for cur_coal, file in self._coalesce_readv(relpath, coalesced):

235

for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):

294

236

# Split the received chunk

295

237

for offset, size in cur_coal.ranges:

296

238

start = cur_coal.start + offset

297

file.seek(start, 0)

298

data = file.read(size)

239

rfile.seek(start, 0)

240

data = rfile.read(size)

299

241

data_len = len(data)

300

242

if data_len != size:

301

243

raise errors.ShortReadvError(relpath, start, size,

302

244

actual=data_len)

303

data_map[(start, size)] = data

245

if (start, size) == cur_offset_and_size:

246

# The offset requested are sorted as the coalesced

247

# ones, no need to cache. Win !

248

yield cur_offset_and_size[0], data

249

cur_offset_and_size = iter_offsets.next()

250

else:

251

# Different sorting. We need to cache.

252

data_map[(start, size)] = data

304

253

305

254

# Yield everything we can

306

255

while cur_offset_and_size in data_map:

311

260

yield cur_offset_and_size[0], this_data

312

261

cur_offset_and_size = iter_offsets.next()

313

262

314

except (errors.ShortReadvError,errors.InvalidRange), e:

263

except (errors.ShortReadvError, errors.InvalidRange,

264

errors.InvalidHttpRange), e:

315

265

self._degrade_range_hint(relpath, coalesced, sys.exc_info())

316

266

# Some offsets may have been already processed, so we retry

317

267

# only the unsuccessful ones.

320

270

321

271

def _coalesce_readv(self, relpath, coalesced):

322

272

"""Issue several GET requests to satisfy the coalesced offsets"""

323

total = len(coalesced)

324

if self._range_hint == 'multi':

325

max_ranges = self._max_get_ranges

326

elif self._range_hint == 'single':

327

max_ranges = total

273

274

def get_and_yield(relpath, coalesced):

275

if coalesced:

276

# Note that the _get below may raise

277

# errors.InvalidHttpRange. It's the caller's responsibility to

278

# decide how to retry since it may provide different coalesced

279

# offsets.

280

code, rfile = self._get(relpath, coalesced)

281

for coal in coalesced:

282

yield coal, rfile

283

284

if self._range_hint is None:

285

# Download whole file

286

for c, rfile in get_and_yield(relpath, coalesced):

287

yield c, rfile

328

288

else:

329

# The whole file will be downloaded anyway

330

max_ranges = total

331

# TODO: Some web servers may ignore the range requests and return the

332

# whole file, we may want to detect that and avoid further requests.

333

# Hint: test_readv_multiple_get_requests will fail in that case .

334

for group in xrange(0, len(coalesced), max_ranges):

335

ranges = coalesced[group:group+max_ranges]

336

# Note that the following may raise errors.InvalidRange. It's the

337

# caller responsability to decide how to retry since it may provide

338

# different coalesced offsets.

339

code, file = self._get(relpath, ranges)

340

for range in ranges:

341

yield range, file

289

total = len(coalesced)

290

if self._range_hint == 'multi':

291

max_ranges = self._max_get_ranges

292

elif self._range_hint == 'single':

293

max_ranges = total

294

else:

295

raise AssertionError("Unknown _range_hint %r"

296

% (self._range_hint,))

297

# TODO: Some web servers may ignore the range requests and return

298

# the whole file, we may want to detect that and avoid further

299

# requests.

300

# Hint: test_readv_multiple_get_requests will fail once we do that

301

cumul = 0

302

ranges = []

303

for coal in coalesced:

304

if ((self._get_max_size > 0

305

and cumul + coal.length > self._get_max_size)

306

or len(ranges) >= max_ranges):

307

# Get that much and yield

308

for c, rfile in get_and_yield(relpath, ranges):

309

yield c, rfile

310

# Restart with the current offset

311

ranges = [coal]

312

cumul = coal.length

313

else:

314

ranges.append(coal)

315

cumul += coal.length

316

# Get the rest and yield

317

for c, rfile in get_and_yield(relpath, ranges):

318

yield c, rfile

342

319

343

320

def recommended_page_size(self):

344

321

"""See Transport.recommended_page_size().

348

325

"""

349

326

return 64 * 1024

350

327

351

@staticmethod

352

@deprecated_method(zero_seventeen)

353

def offsets_to_ranges(offsets):

354

"""Turn a list of offsets and sizes into a list of byte ranges.

355

356

:param offsets: A list of tuples of (start, size). An empty list

357

is not accepted.

358

:return: a list of inclusive byte ranges (start, end)

359

Adjacent ranges will be combined.

360

"""

361

# Make sure we process sorted offsets

362

offsets = sorted(offsets)

363

364

prev_end = None

365

combined = []

366

367

for start, size in offsets:

368

end = start + size - 1

369

if prev_end is None:

370

combined.append([start, end])

371

elif start <= prev_end + 1:

372

combined[-1][1] = end

373

else:

374

combined.append([start, end])

375

prev_end = end

376

377

return combined

378

379

328

def _post(self, body_bytes):

380

329

"""POST body_bytes to .bzr/smart on this transport.

381

330

490

439

return self.__class__(self.abspath(offset), self)

491

440

492

441

def _attempted_range_header(self, offsets, tail_amount):

493

"""Prepare a HTTP Range header at a level the server should accept"""

442

"""Prepare a HTTP Range header at a level the server should accept.

443

444

:return: the range header representing offsets/tail_amount or None if

445

no header can be built.

446

"""

494

447

495

448

if self._range_hint == 'multi':

496

449

# Generate the header describing all offsets

Older »