"""Effecient git-aware file finder with filtering capabilities.Uses Linux commands: find, comm and git-ignore for an efficient path walker.Example usage:.. code-block:: python # Simple usage with default root finder = cli2.Find() files = finder.files() dirs = finder.dirs() # Usage with filters and callback def callback(filepath): print(f"Found: {filepath}") finder = cli2.Find( root="/path/to/repo", glob_include=['*.py'], glob_exclude=['*test*'], file_callback=callback ) files = finder.files() dirs = finder.dirs()"""importcli2fromfnmatchimportfnmatchfrompathlibimportPathimportos
[docs]classFind:""" A class to walk through files and directories not ignored by git with optional filtering. .. py:attribute:: root Root directory for file operations .. py:attribute:: glob_include Optional list of glob patterns to include .. py:attribute:: glob_exclude Optional list of glob patterns to exclude .. py:attribute:: file_callback Optional callback function called for each file or directory """def__init__(self,root=None,glob_include=None,glob_exclude=None,file_callback=None,):""" Initialize Find with optional root directory, filters, and callback. :param root: Root directory (defaults to current working directory if not specified) :type root: str or pathlib.Path or None :param glob_include: List of glob patterns to include :type glob_include: list or None :param glob_exclude: List of glob patterns to exclude :type glob_exclude: list or None :param file_callback: Function to call for each file or directory :type file_callback: callable or None """self.root=Path(rootifrootisnotNoneelseos.getcwd()).resolve()self.glob_include=glob_includeifglob_includeisnotNoneelse[]self.glob_exclude=glob_excludeifglob_excludeisnotNoneelse[]self.file_callback=file_callbackdef_matches_filters(self,filepath):""" Check if a file or directory matches the include/exclude filters. :param filepath: Path to check against filters :type filepath: pathlib.Path :return: True if path should be included, False otherwise :rtype: bool """filepath_str=str(filepath.relative_to(self.root))ifself.glob_include:ifnotany(fnmatch(filepath_str,pattern)forpatterninself.glob_include):returnFalseifself.glob_exclude:ifany(fnmatch(filepath_str,pattern)forpatterninself.glob_exclude):returnFalsereturnTrue
[docs]deffiles(self,directory=None):""" List files not ignored by git, applying filters and callback. :param directory: Directory to start search from (defaults to root if not specified) :type directory: str or pathlib.Path or None :return: List of Path objects for files not ignored by git that match filters :rtype: list :raises RuntimeError: If the git command fails """base_path=Path(directory).resolve()ifdirectoryelseself.rootcmd=' '.join([f'comm -23 <(find {base_path} -type f | sort)',f'<(find {base_path} -type f | git check-ignore --stdin | sort)'])proc=cli2.Proc("bash","-c",cmd).wait()ifproc.rc!=0:raiseRuntimeError(f"Command failed with return code {proc.rc}: {proc.stderr}")files=[]forlineinproc.stdout.splitlines():ifnotline.strip():continuefilepath=(base_path/line.strip()).resolve()if(notself.glob_includeandnotself.glob_exclude)orself._matches_filters(filepath):files.append(filepath)ifself.file_callback:self.file_callback(filepath)returnfiles
[docs]defdirs(self,directory=None):""" List directories not ignored by git, applying filters and callback. :param directory: Directory to start search from (defaults to root if not specified) :type directory: str or pathlib.Path or None :return: List of Path objects for directories not ignored by git that match filters :rtype: list :raises RuntimeError: If the git command fails """base_path=Path(directory).resolve()ifdirectoryelseself.rootcmd=' '.join([f'comm -23 <(find {base_path} -type d | sort)',f'<(find {base_path} -type d | git check-ignore --stdin | sort)'])proc=cli2.Proc("bash","-c",cmd).wait()ifproc.rc!=0:raiseRuntimeError(f"Command failed with return code {proc.rc}: {proc.stderr}")dirs=[]forlineinproc.stdout.splitlines():ifnotline.strip():continuedirpath=(base_path/line.strip()).resolve()ifdirpath==base_path:continueif(notself.glob_includeandnotself.glob_exclude)orself._matches_filters(dirpath):dirs.append(dirpath)ifself.file_callback:self.file_callback(dirpath)returndirs