"""Like levenshtein with difflib... code-block:: python source_word = "apple" word_list = ["apply", "aple", "banana", "orange", "applet"] closest = cli2.closest(source_word, word_list) print(f"The closest word to '{source_word}' is: {closest}")"""importdifflib
[docs]defclosest(source_token,token_list):""" Finds the token in token_list with the shortest distance to source_token. :param source_token: The source token (string). :param token_list: A list of tokens (strings). :return: The token with the shortest distance, or None if token_list is empty. """ifnottoken_list:returnNoneclosest_token=Noneshortest_distance=float("inf")# Initialize with infinityfortokenintoken_list:matcher=difflib.SequenceMatcher(None,source_token,token)distance=(1-matcher.ratio())# Calculate a distance metric (1 - similarity ratio)ifdistance<shortest_distance:shortest_distance=distanceclosest_token=tokenreturnclosest_token
[docs]defclosest_path(path,paths):""" Find the closest path from paths. LLM may output broken paths, this fixes them. :param path: Path to find closest :param paths: List of paths to search in. """parts=path.split('/')fornumber,partinenumerate(parts):paths_parts={str(path).split('/')[number]forpathinpaths}ifpartnotinpaths_parts:path_part=closest(part,paths_parts)ifnotpath_part:returnNone# not found at allelse:path_part=partparts[number]=path_partpaths={pathforpathinpathsifstr(path).startswith('/'.join(parts[:number+1]))}return'/'.join(parts)