2026-1-6
This commit is contained in:
70
venv/Lib/site-packages/whoosh/support/levenshtein.py
Normal file
70
venv/Lib/site-packages/whoosh/support/levenshtein.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
Contains functions implementing edit distance algorithms.
|
||||
"""
|
||||
|
||||
from whoosh.compat import xrange
|
||||
|
||||
|
||||
def levenshtein(seq1, seq2, limit=None):
|
||||
"""Returns the Levenshtein edit distance between two strings.
|
||||
"""
|
||||
|
||||
oneago = None
|
||||
thisrow = list(range(1, len(seq2) + 1)) + [0]
|
||||
for x in xrange(len(seq1)):
|
||||
# Python lists wrap around for negative indices, so put the
|
||||
# leftmost column at the *end* of the list. This matches with
|
||||
# the zero-indexed strings and saves extra calculation.
|
||||
oneago, thisrow = thisrow, [0] * len(seq2) + [x + 1]
|
||||
for y in xrange(len(seq2)):
|
||||
delcost = oneago[y] + 1
|
||||
addcost = thisrow[y - 1] + 1
|
||||
subcost = oneago[y - 1] + (seq1[x] != seq2[y])
|
||||
thisrow[y] = min(delcost, addcost, subcost)
|
||||
|
||||
if limit and x > limit and min(thisrow) > limit:
|
||||
return limit + 1
|
||||
|
||||
return thisrow[len(seq2) - 1]
|
||||
|
||||
|
||||
def damerau_levenshtein(seq1, seq2, limit=None):
|
||||
"""Returns the Damerau-Levenshtein edit distance between two strings.
|
||||
"""
|
||||
|
||||
oneago = None
|
||||
thisrow = list(range(1, len(seq2) + 1)) + [0]
|
||||
for x in xrange(len(seq1)):
|
||||
# Python lists wrap around for negative indices, so put the
|
||||
# leftmost column at the *end* of the list. This matches with
|
||||
# the zero-indexed strings and saves extra calculation.
|
||||
twoago, oneago, thisrow = oneago, thisrow, [0] * len(seq2) + [x + 1]
|
||||
for y in xrange(len(seq2)):
|
||||
delcost = oneago[y] + 1
|
||||
addcost = thisrow[y - 1] + 1
|
||||
subcost = oneago[y - 1] + (seq1[x] != seq2[y])
|
||||
thisrow[y] = min(delcost, addcost, subcost)
|
||||
# This block deals with transpositions
|
||||
if (x > 0 and y > 0 and seq1[x] == seq2[y - 1]
|
||||
and seq1[x - 1] == seq2[y] and seq1[x] != seq2[y]):
|
||||
thisrow[y] = min(thisrow[y], twoago[y - 2] + 1)
|
||||
|
||||
if limit and x > limit and min(thisrow) > limit:
|
||||
return limit + 1
|
||||
|
||||
return thisrow[len(seq2) - 1]
|
||||
|
||||
|
||||
def relative(a, b):
|
||||
"""Returns the relative distance between two strings, in the range
|
||||
[0-1] where 1 means total equality.
|
||||
"""
|
||||
|
||||
d = distance(a, b)
|
||||
longer = float(max((len(a), len(b))))
|
||||
shorter = float(min((len(a), len(b))))
|
||||
r = ((longer - d) / longer) * (shorter / longer)
|
||||
return r
|
||||
|
||||
|
||||
distance = damerau_levenshtein
|
||||
Reference in New Issue
Block a user