gh-142939: difflib.get_close_matches performance (#142940)

This commit is contained in:
dgpb 2025-12-30 09:15:59 +02:00 committed by GitHub
parent ef834dee89
commit 23ad9c5d01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 10 additions and 7 deletions

View file

@ -638,15 +638,15 @@ def quick_ratio(self):
# avail[x] is the number of times x appears in 'b' less the
# number of times we've seen it in 'a' so far ... kinda
avail = {}
availhas, matches = avail.__contains__, 0
matches = 0
for elt in self.a:
if availhas(elt):
if elt in avail:
numb = avail[elt]
else:
numb = fullbcount.get(elt, 0)
avail[elt] = numb - 1
if numb > 0:
matches = matches + 1
matches += 1
return _calculate_ratio(matches, len(self.a) + len(self.b))
def real_quick_ratio(self):
@ -702,10 +702,12 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6):
s.set_seq2(word)
for x in possibilities:
s.set_seq1(x)
if s.real_quick_ratio() >= cutoff and \
s.quick_ratio() >= cutoff and \
s.ratio() >= cutoff:
result.append((s.ratio(), x))
if s.real_quick_ratio() < cutoff or s.quick_ratio() < cutoff:
continue
ratio = s.ratio()
if ratio >= cutoff:
result.append((ratio, x))
# Move the best scorers to head of list
result = _nlargest(n, result)

View file

@ -0,0 +1 @@
Performance optimisations for :func:`difflib.get_close_matches`