445
445
unescape_revid_cache = {}
446
446
unescape_fileid_cache = {}
448
# jam 20061218 In a big fetch, this handles hundreds of thousands
449
# of lines, so it has had a lot of inlining and optimizing done.
450
# Sorry that it is a little bit messy.
448
451
# Move several functions to be local variables, since this is a long
450
453
search = self._file_ids_altered_regex.search
451
unescape = _unescape_xml_cached
454
unescape = _unescape_xml
452
455
setdefault = result.setdefault
453
456
pb = ui.ui_factory.nested_progress_bar()
457
460
match = search(line)
458
461
if match is None:
463
# One call to match.group() returning multiple items is quite a
464
# bit faster than 2 calls to match.group() each returning 1
460
465
file_id, revision_id = match.group('file_id', 'revision_id')
461
revision_id = unescape(revision_id, unescape_revid_cache)
467
# Inlining the cache lookups helps a lot when you make 170,000
468
# lines and 350k ids, versus 8.4 unique ids.
469
# Using a cache helps in 2 ways:
470
# 1) Avoids unnecessary decoding calls
471
# 2) Re-uses cached strings, which helps in future set and
473
# (2) is enough that removing encoding entirely along with
474
# the cache (so we are using plain strings) results in no
475
# performance improvement.
477
revision_id = unescape_revid_cache[revision_id]
479
unescaped = unescape(revision_id)
480
unescape_revid_cache[revision_id] = unescaped
481
revision_id = unescaped
462
483
if revision_id in selected_revision_ids:
463
file_id = unescape(file_id, unescape_fileid_cache)
485
file_id = unescape_fileid_cache[file_id]
487
unescaped = unescape(file_id)
488
unescape_fileid_cache[file_id] = unescaped
464
490
setdefault(file_id, set()).add(revision_id)
2576
2602
if _unescape_re is None:
2577
2603
_unescape_re = re.compile('\&([^;]*);')
2578
2604
return _unescape_re.sub(_unescaper, data)
2581
def _unescape_xml_cached(data, cache):
2585
unescaped = _unescape_xml(data)
2586
cache[data] = unescaped