321
320
# print "fetched file revision", line[:-1], path
324
# The various version of _get_entry_revision can be tested by pulling from
325
# the git repo of git itself. First pull up to r700, then r702 to
326
# reproduce the RevisionNotPresent errors.
328
def _set_entry_revision_unoptimized(self, entry, revid, path, git_id):
329
# This is unusably slow and will lead to recording a few unnecessary
330
# duplicated file texts. But it seems to be consistent enough to let
331
# pulls resume without causing RevisionNotPresent errors.
332
entry.revision = self._get_file_revision(revid, path)
334
def _set_entry_revision_optimized1(self, entry, revid, path, git_id):
335
# This is much faster, produces fewer unique file texts, but will
336
# cause RevisionNotPresent errors when resuming pull.
338
# Oops, this does not account for changes in executable bit. That is
339
# probably why it produces fewer unique texts.
340
cached = self._entry_revision_cache.get((revid, path, git_id))
341
if cached is not None:
342
entry.revision = cached
344
revision = self.get_revision(revid)
345
for parent_id in revision.parent_ids:
346
entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))
347
if entry_rev is not None:
350
entry_rev = self._get_file_revision(revid, path)
351
self._entry_revision_cache[(revid, path, git_id)] = entry_rev
352
entry.revision = entry_rev
354
def _set_entry_revision_optimized2(self, entry, revid, path, git_id):
355
# This is slower than the previous one, and does not appear to have a
356
# subtantially different effect. Same number of unique texts, same
357
# RevisionNotPresent error.
359
# Oops, this does not account for changes in executable bit. That is
360
# probably why it produces fewer unique texts.
361
cached = self._entry_revision_cache.get((revid, path, git_id))
362
if cached is not None:
363
entry.revision = cached
365
revision = self.get_revision(revid)
367
for parent_id in revision.parent_ids:
368
entry_rev = self._entry_revision_cache.get((parent_id, path, git_id))
369
if entry_rev is not None:
370
parent_hits.append(entry_rev)
371
if len(parent_hits) == len(revision.parent_ids) and len(set(parent_hits)) == 1:
372
entry_rev = parent_hits[0]
374
entry_rev = self._get_file_revision(revid, path)
375
self._entry_revision_cache[(revid, path, git_id)] = entry_rev
376
entry.revision = entry_rev
378
_original_get_inventory = get_inventory
379
def _get_inventory_caching(self, revid):
380
if revid in self._inventory_cache:
381
return self._inventory_cache[revid]
382
inv = self._original_get_inventory(revid)
383
self._inventory_cache[revid] = inv
386
def _set_entry_revision_optimized3(self, entry, revid, path, git_id):
387
# Depends on _get_inventory_caching.
389
# Set the revision of directories to the current revision. It's not
390
# accurate, but we cannot compare directory contents from here.
391
if entry.kind == 'directory':
392
entry.revision = revid
394
# Build ancestral inventories by walking parents depth first. Ideally
395
# this should be done in an inter-repository, where already imported
396
# data can be used as reference.
397
current_revid = revid
398
revision = self.get_revision(revid)
399
pending_revids = list(reversed(revision.parent_ids))
400
while pending_revids:
401
revid = pending_revids.pop()
402
if revid in self._inventory_cache:
404
# Not in cache, ensure parents are in cache first.
405
pending_revids.append(revid)
406
revision = self.get_revision(revid)
407
for parent_id in reversed(revision.parent_ids):
408
if parent_id not in self._inventory_cache:
409
pending_revids.extend(reversed(revision.parent_ids))
412
# All parents are in cache, we can now build this inventory.
413
revid = pending_revids.pop()
414
self.get_inventory(revid) # populate cache
415
# We now have all ancestral inventories in the cache. Get entries by
416
# the same file_id in parent inventories, and use the revision of the
417
# first one that has the same text_sha1 and executable bit.
418
revision = self.get_revision(current_revid)
419
for revid in revision.parent_ids:
420
inventory = self.get_inventory(revid)
421
if entry.file_id in inventory:
422
parent_entry = inventory[entry.file_id]
423
if (parent_entry.text_sha1 == entry.text_sha1
424
and parent_entry.executable == entry.executable):
425
entry.revision = parent_entry.revision
427
# If we get here, that means we found no matching parent entry, use
428
# the current revision.
429
entry.revision = current_revid
431
def _set_entry_revision_optimized4(self, entry, revid, path, git_id):
432
# Same as optimized1, but uses the executable bit in the cache index.
433
# That appears to have the same behaviour as the unoptimized version.
434
cached = self._entry_revision_cache.get(
435
(revid, path, git_id, entry.executable))
436
if cached is not None:
437
entry.revision = cached
439
revision = self.get_revision(revid)
440
for parent_id in revision.parent_ids:
441
entry_rev = self._entry_revision_cache.get(
442
(parent_id, path, git_id, entry.executable))
443
if entry_rev is not None:
446
entry_rev = self._get_file_revision(revid, path)
447
self._entry_revision_cache[
448
(revid, path, git_id, entry.executable)] = entry_rev
449
entry.revision = entry_rev
451
def _set_entry_revision_optimized5(self, entry, revid, path, git_id):
452
# Same as optimized4, but makes get_inventory non-reentrant, and uses
453
# a more structured cache.
455
# cache[revision][path, git_id, executable] -> revision
323
def _set_entry_revision(self, entry, revid, path, git_id):
457
324
# If a revision is in the cache, we assume it contains entries for the
458
325
# whole inventory. So if all parent revisions are in the cache, but no
459
326
# parent entry is present, then the entry revision is the current
460
# revision. That amortizes the number of git calls for large pulls to
327
# revision. That amortizes the number of _get_file_revision calls for
328
# large pulls to a "small number".
462
329
cached = self._entry_revision_cache.get(revid, {}).get(
463
330
(path, git_id, entry.executable))
464
331
if cached is not None: