/brz/remove-bazaar : revision 0.200.44

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to git_repository.py

Committer: David Allouche
Date: 2007-12-29 22:02:30 UTC
mto: (0.312.1 master) (6883.23.1 bundle-git)
mto: This revision was merged to the branch mainline in revision 6960.
Revision ID: ddaa@canonical.com-20071229220230-ejqu4l2mb1efqu37

Remove some experimental cruft.

files modified:
git_repository.py

model.py

Show diffs side-by-side

added added

removed removed

git_repository.py

self._revision_cache = {}

self._blob_cache = {}

self._entry_revision_cache = {}

self._inventory_cache = {}

def _ancestor_revisions(self, revision_ids):

if revision_ids is not None:

321

320

# print "fetched file revision", line[:-1], path

322

321

return result

323

322

324

# The various version of _get_entry_revision can be tested by pulling from

325

# the git repo of git itself. First pull up to r700, then r702 to

326

# reproduce the RevisionNotPresent errors.

327

328

def _set_entry_revision_unoptimized(self, entry, revid, path, git_id):

329

# This is unusably slow and will lead to recording a few unnecessary

330

# duplicated file texts. But it seems to be consistent enough to let

331

# pulls resume without causing RevisionNotPresent errors.

332

entry.revision = self._get_file_revision(revid, path)

333

334

def _set_entry_revision_optimized1(self, entry, revid, path, git_id):

335

# This is much faster, produces fewer unique file texts, but will

336

# cause RevisionNotPresent errors when resuming pull.

337

338

# Oops, this does not account for changes in executable bit. That is

339

# probably why it produces fewer unique texts.

340

cached = self._entry_revision_cache.get((revid, path, git_id))

341

if cached is not None:

342

entry.revision = cached

343

return

344

revision = self.get_revision(revid)

345

for parent_id in revision.parent_ids:

346

entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))

347

if entry_rev is not None:

348

break

349

else:

350

entry_rev = self._get_file_revision(revid, path)

351

self._entry_revision_cache[(revid, path, git_id)] = entry_rev

352

entry.revision = entry_rev

353

354

def _set_entry_revision_optimized2(self, entry, revid, path, git_id):

355

# This is slower than the previous one, and does not appear to have a

356

# subtantially different effect. Same number of unique texts, same

357

# RevisionNotPresent error.

358

359

# Oops, this does not account for changes in executable bit. That is

360

# probably why it produces fewer unique texts.

361

cached = self._entry_revision_cache.get((revid, path, git_id))

362

if cached is not None:

363

entry.revision = cached

364

return

365

revision = self.get_revision(revid)

366

parent_hits = []

367

for parent_id in revision.parent_ids:

368

entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))

369

if entry_rev is not None:

370

parent_hits.append(entry_rev)

371

if len(parent_hits) == len(revision.parent_ids) and len(set(parent_hits)) == 1:

372

entry_rev = parent_hits[0]

373

else:

374

entry_rev = self._get_file_revision(revid, path)

375

self._entry_revision_cache[(revid, path, git_id)] = entry_rev

376

entry.revision = entry_rev

377

378

_original_get_inventory = get_inventory

379

def _get_inventory_caching(self, revid):

380

if revid in self._inventory_cache:

381

return self._inventory_cache[revid]

382

inv = self._original_get_inventory(revid)

383

self._inventory_cache[revid] = inv

384

return inv

385

386

def _set_entry_revision_optimized3(self, entry, revid, path, git_id):

387

# Depends on _get_inventory_caching.

388

389

# Set the revision of directories to the current revision. It's not

390

# accurate, but we cannot compare directory contents from here.

391

if entry.kind == 'directory':

392

entry.revision = revid

393

return

394

# Build ancestral inventories by walking parents depth first. Ideally

395

# this should be done in an inter-repository, where already imported

396

# data can be used as reference.

397

current_revid = revid

398

revision = self.get_revision(revid)

399

pending_revids = list(reversed(revision.parent_ids))

400

while pending_revids:

401

revid = pending_revids.pop()

402

if revid in self._inventory_cache:

403

continue

404

# Not in cache, ensure parents are in cache first.

405

pending_revids.append(revid)

406

revision = self.get_revision(revid)

407

for parent_id in reversed(revision.parent_ids):

408

if parent_id not in self._inventory_cache:

409

pending_revids.extend(reversed(revision.parent_ids))

410

break

411

else:

412

# All parents are in cache, we can now build this inventory.

413

revid = pending_revids.pop()

414

self.get_inventory(revid) # populate cache

415

# We now have all ancestral inventories in the cache. Get entries by

416

# the same file_id in parent inventories, and use the revision of the

417

# first one that has the same text_sha1 and executable bit.

418

revision = self.get_revision(current_revid)

419

for revid in revision.parent_ids:

420

inventory = self.get_inventory(revid)

421

if entry.file_id in inventory:

422

parent_entry = inventory[entry.file_id]

423

if (parent_entry.text_sha1 == entry.text_sha1

424

and parent_entry.executable == entry.executable):

425

entry.revision = parent_entry.revision

426

return

427

# If we get here, that means we found no matching parent entry, use

428

# the current revision.

429

entry.revision = current_revid

430

431

def _set_entry_revision_optimized4(self, entry, revid, path, git_id):

432

# Same as optimized1, but uses the executable bit in the cache index.

433

# That appears to have the same behaviour as the unoptimized version.

434

cached = self._entry_revision_cache.get(

435

(revid, path, git_id, entry.executable))

436

if cached is not None:

437

entry.revision = cached

438

return

439

revision = self.get_revision(revid)

440

for parent_id in revision.parent_ids:

441

entry_rev = self._entry_revision_cache.get(

442

(parent_id, path, git_id, entry.executable))

443

if entry_rev is not None:

444

break

445

else:

446

entry_rev = self._get_file_revision(revid, path)

447

self._entry_revision_cache[

448

(revid, path, git_id, entry.executable)] = entry_rev

449

entry.revision = entry_rev

450

451

def _set_entry_revision_optimized5(self, entry, revid, path, git_id):

452

# Same as optimized4, but makes get_inventory non-reentrant, and uses

453

# a more structured cache.

454

455

# cache[revision][path, git_id, executable] -> revision

456

323

def _set_entry_revision(self, entry, revid, path, git_id):

457

324

# If a revision is in the cache, we assume it contains entries for the

458

325

# whole inventory. So if all parent revisions are in the cache, but no

459

326

# parent entry is present, then the entry revision is the current

460

# revision. That amortizes the number of git calls for large pulls to

461

# zero.

327

# revision. That amortizes the number of _get_file_revision calls for

328

# large pulls to a "small number".

462

329

cached = self._entry_revision_cache.get(revid, {}).get(

463

330

(path, git_id, entry.executable))

464

331

if cached is not None:

483

350

revid, {})[(path, git_id, entry.executable)] = entry_rev

484

351

entry.revision = entry_rev

485

352

486

_set_entry_revision = _set_entry_revision_optimized5

487

#get_inventory = _get_inventory_caching

488

489

353

490

354

def escape_file_id(file_id):

491

355

return file_id.replace('_', '__').replace(' ', '_s')

Older »