Source code for cli2.find

"""
Effecient git-aware file finder with filtering capabilities.

Uses Linux commands: find, comm and git-ignore for an efficient path walker.

Example usage:

.. code-block:: python

    # Simple usage with default root
    finder = cli2.Find(flags='-type f')

    # Usage with filters and callback
    def callback(filepath):
        print(f"Found: {filepath}")

    finder = cli2.Find(
        root="/path/to/repo",
        glob_include=['*.py'],
        glob_exclude=['*test*'],
        callback=callback
    )
    files = finder.run()
"""
from fnmatch import fnmatch
from pathlib import Path
import os
import subprocess


[docs] class Find: """ A class to walk through files and directories not ignored by git with optional filtering. .. py:attribute:: root Root directory for file operations .. py:attribute:: glob_include Optional list of glob patterns to include .. py:attribute:: glob_exclude Optional list of glob patterns to exclude .. py:attribute:: callback Optional callback function called for each file or directory .. py:attribute:: flags Set this to '-type f' to limit find to files or example. """ def __init__( self, root=None, glob_include=None, glob_exclude=None, callback=None, flags='', ): """ Initialize Find with optional root directory, filters, and callback. :param root: Root directory (defaults to current working directory if not specified) :type root: str or pathlib.Path or None :param glob_include: List of glob patterns to include :type glob_include: list or None :param glob_exclude: List of glob patterns to exclude :type glob_exclude: list or None :param callback: Function to call for each file or directory :type callback: callable or None :param flags: Arguments for the fing command. """ self.root = Path(os.getcwd() if root is None else root).resolve() self.glob_include = glob_include if glob_include is not None else [] self.glob_exclude = glob_exclude if glob_exclude is not None else [] self.callback = callback self.flags = flags def _matches_filters(self, filepath): """ Check if a file or directory matches the include/exclude filters. :param filepath: Path to check against filters :type filepath: pathlib.Path :return: True if path should be included, False otherwise :rtype: bool """ filepath_str = str(filepath.relative_to(self.root)) if self.glob_include: if not any( fnmatch(filepath_str, pattern) for pattern in self.glob_include ): return False if self.glob_exclude: if any( fnmatch(filepath_str, pattern) for pattern in self.glob_exclude ): return False return True
[docs] def run(self, directory=None, relative=True): """ Actually run the find command. :param directory: str or Path, or None to use self.root :param relative: Wether to return relative paths """ base_path = Path(directory).resolve() if directory else self.root cmd = ' '.join([ f'bash -c "comm -23 <(find . {self.flags} | sort)', f'<(find . {self.flags} | git check-ignore --stdin | sort)"', ]) stdout = subprocess.check_output( cmd, shell=True, stderr=subprocess.STDOUT, cwd=str(base_path) ) results = [] for line in stdout.splitlines(): if not line.strip(): continue filepath = (base_path / line.decode().strip()).resolve() if ( not self.glob_include and not self.glob_exclude ) or self._matches_filters(filepath): if relative: filepath = filepath.relative_to(base_path) results.append(filepath) if self.callback: self.callback(filepath) return results