Source code for cli2.notlevenshtein
"""
Like levenshtein with difflib.
.. code-block:: python
source_word = "apple"
word_list = ["apply", "aple", "banana", "orange", "applet"]
closest = cli2.closest(source_word, word_list)
print(f"The closest word to '{source_word}' is: {closest}")
"""
import difflib
[docs]
def closest(source_token, token_list):
"""
Finds the token in token_list with the shortest distance to source_token.
:param source_token: The source token (string).
:param token_list: A list of tokens (strings).
:return: The token with the shortest distance, or None if token_list is
empty.
"""
if not token_list:
return None
closest_token = None
shortest_distance = float("inf") # Initialize with infinity
for token in token_list:
matcher = difflib.SequenceMatcher(None, source_token, token)
distance = (
1 - matcher.ratio()
) # Calculate a distance metric (1 - similarity ratio)
if distance < shortest_distance:
shortest_distance = distance
closest_token = token
return closest_token
[docs]
def closest_path(path, paths):
"""
Find the closest path from paths.
LLM may output broken paths, this fixes them.
:param path: Path to find closest
:param paths: List of paths to search in.
"""
parts = path.split('/')
for number, part in enumerate(parts):
paths_parts = {
str(path).split('/')[number]
for path in paths
}
if part not in paths_parts:
path_part = closest(part, paths_parts)
if not path_part:
return None # not found at all
else:
path_part = part
parts[number] = path_part
paths = {
path
for path in paths
if str(path).startswith('/'.join(parts[:number + 1]))
}
return '/'.join(parts)