Source code for cli2.notlevenshtein

"""
Like levenshtein with difflib.

.. code-block:: python

        source_word = "apple"
        word_list = ["apply", "aple", "banana", "orange", "applet"]

        closest = cli2.closest(source_word, word_list)
        print(f"The closest word to '{source_word}' is: {closest}")
"""

import difflib


[docs] def closest(source_token, token_list): """ Finds the token in token_list with the shortest distance to source_token. :param source_token: The source token (string). :param token_list: A list of tokens (strings). :return: The token with the shortest distance, or None if token_list is empty. """ if not token_list: return None closest_token = None shortest_distance = float("inf") # Initialize with infinity for token in token_list: matcher = difflib.SequenceMatcher(None, source_token, token) distance = ( 1 - matcher.ratio() ) # Calculate a distance metric (1 - similarity ratio) if distance < shortest_distance: shortest_distance = distance closest_token = token return closest_token
[docs] def closest_path(path, paths): """ Find the closest path from paths. LLM may output broken paths, this fixes them. :param path: Path to find closest :param paths: List of paths to search in. """ parts = path.split('/') for number, part in enumerate(parts): paths_parts = { str(path).split('/')[number] for path in paths } if part not in paths_parts: path_part = closest(part, paths_parts) if not path_part: return None # not found at all else: path_part = part parts[number] = path_part paths = { path for path in paths if str(path).startswith('/'.join(parts[:number + 1])) } return '/'.join(parts)