diff --git a/README.md b/README.md index 3b1a018..e6385f1 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ - **Syntax Highlighting**: Includes syntax highlighting for known file types in the generated Markdown file. - **Structure Only Option**: The `--structure-only` flag can be used to generate the Markdown file with just the directory structure, omitting the contents of the files. - **Gitignore Support**: Automatically respects `.gitignore` patterns to exclude files and directories. +- **Include and Exclude Patterns**: Use `--include` and `--exclude` to specify patterns for files and directories to include or exclude. ## Installation @@ -35,7 +36,7 @@ pip install -r requirements.txt To use `reposnap` from the command line, run it with the following options: ```bash -reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] path +reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] [-i INCLUDE [INCLUDE ...]] [-e EXCLUDE [EXCLUDE ...]] path ``` - `path`: Path to the Git repository or subdirectory. @@ -43,6 +44,19 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] path - `-o, --output`: The name of the output Markdown file. Defaults to `output.md`. - `--structure-only`: Generate a Markdown file that includes only the project structure, without file contents. - `--debug`: Enable debug-level logging. +- `-i, --include`: File/folder patterns to include. For example, `-i "*.py"` includes only Python files. +- `-e, --exclude`: File/folder patterns to exclude. For example, `-e "*.md"` excludes all Markdown files. + +#### Pattern Matching + +- **Pattern Interpretation**: Patterns follow gitignore-style syntax but with a twist. + - **Patterns without Wildcards**: If a pattern does not contain any wildcard characters (`*`, `?`, or `[`), it is treated as `*pattern*`. This means it will match any file or directory containing `pattern` in its name. + - **Patterns with Wildcards**: If a pattern contains wildcard characters, it retains its original behavior. + +- **Examples**: + - `-e "gui"`: Excludes any files or directories containing `"gui"` in their names. + - `-i "*.py"`: Includes only files ending with `.py`. + - `-e "*.test.*"`: Excludes files with `.test.` in their names. #### Examples @@ -58,10 +72,22 @@ reposnap [-h] [-o OUTPUT] [--structure-only] [--debug] path reposnap my_project/ --structure-only ``` -3. **Generate a Markdown file excluding certain files and directories**: +3. **Generate a Markdown file including only Python files**: + + ```bash + reposnap my_project/ -i "*.py" + ``` + +4. **Generate a Markdown file excluding certain files and directories**: + + ```bash + reposnap my_project/ -e "tests" -e "*.md" + ``` + +5. **Exclude files and directories containing a substring**: ```bash - reposnap my_project/ -o output.md + reposnap my_project/ -e "gui" ``` ### Graphical User Interface diff --git a/pyproject.toml b/pyproject.toml index 54d9efc..e220c7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "reposnap" -version = "0.4.0" +version = "0.5.0" description = "Generate a Markdown file with all contents of your project" authors = [ { name = "agoloborodko" } diff --git a/src/reposnap/controllers/project_controller.py b/src/reposnap/controllers/project_controller.py index 6db28a8..a79101a 100644 --- a/src/reposnap/controllers/project_controller.py +++ b/src/reposnap/controllers/project_controller.py @@ -19,6 +19,8 @@ def __init__(self, args: Optional[object] = None): self.args: object = args self.file_tree: Optional[FileTree] = None self.gitignore_patterns: List[str] = [] + self.include_patterns: List[str] = args.include if args and hasattr(args, 'include') else [] + self.exclude_patterns: List[str] = args.exclude if args and hasattr(args, 'exclude') else [] if self.root_dir: self.gitignore_patterns = self._load_gitignore_patterns() @@ -40,6 +42,30 @@ def collect_file_tree(self) -> None: git_files: List[Path] = git_repo.get_git_files() self.logger.debug(f"Git files before filtering: {git_files}") + # Adjust patterns + def adjust_patterns(patterns): + adjusted = [] + for pattern in patterns: + if '*' in pattern or '?' in pattern or '[' in pattern: + adjusted.append(pattern) + else: + adjusted.append(f'*{pattern}*') + return adjusted + + # Apply include patterns + if self.include_patterns: + adjusted_include_patterns = adjust_patterns(self.include_patterns) + include_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, adjusted_include_patterns) + git_files = [f for f in git_files if include_spec.match_file(f.as_posix())] + self.logger.debug(f"Git files after include patterns: {git_files}") + + # Apply exclude patterns + if self.exclude_patterns: + adjusted_exclude_patterns = adjust_patterns(self.exclude_patterns) + exclude_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, adjusted_exclude_patterns) + git_files = [f for f in git_files if not exclude_spec.match_file(f.as_posix())] + self.logger.debug(f"Git files after exclude patterns: {git_files}") + self.logger.info("Building tree structure.") file_system: FileSystem = FileSystem(self.root_dir) tree_structure: dict = file_system.build_tree_structure(git_files) diff --git a/src/reposnap/interfaces/cli.py b/src/reposnap/interfaces/cli.py index 2be1aeb..c7aa38b 100644 --- a/src/reposnap/interfaces/cli.py +++ b/src/reposnap/interfaces/cli.py @@ -13,6 +13,12 @@ def main(): help='Only include the file structure without content.') parser.add_argument('--debug', action='store_true', help='Enable debug-level logging.') + # New arguments for include and exclude patterns + parser.add_argument('-i', '--include', nargs='*', default=[], + help='File/folder patterns to include.') + parser.add_argument('-e', '--exclude', nargs='*', default=[], + help='File/folder patterns to exclude.') + args = parser.parse_args() log_level = logging.DEBUG if args.debug else logging.INFO @@ -21,5 +27,6 @@ def main(): controller = ProjectController(args) controller.run() + if __name__ == "__main__": main() diff --git a/tests/reposnap/test_project_controller.py b/tests/reposnap/test_project_controller.py index 95b3189..a66c668 100644 --- a/tests/reposnap/test_project_controller.py +++ b/tests/reposnap/test_project_controller.py @@ -83,15 +83,224 @@ def test_project_controller_run(mock_git_repo, mock_file_system, mock_markdown_g mock_file_system.return_value = mock_file_system_instance mock_markdown_generator.return_value = mock_markdown_generator_instance - mock_git_repo_instance.get_git_files.return_value = ['file1.py', 'file2.py'] + # Use Path objects instead of strings + mock_git_repo_instance.get_git_files.return_value = [Path('file1.py'), Path('file2.py')] mock_file_system_instance.build_tree_structure.return_value = {'dir': {'file1.py': 'file1.py'}} args = MagicMock() args.path = 'root_dir' args.output = 'output.md' args.structure_only = False + args.include = [] + args.exclude = [] + args.debug = False # Add if necessary controller = ProjectController(args) controller.run() mock_markdown_generator_instance.generate_markdown.assert_called_once() + + +def test_include_pattern(): + with tempfile.TemporaryDirectory() as temp_dir: + structure = { + 'src': { + 'module': { + 'file1.py': 'print("File 1")', + 'file2.txt': 'File 2 content', + 'submodule': { + 'file3.py': 'print("File 3")', + 'file4.md': '# File 4', + } + } + }, + 'README.md': '# Project README', + 'setup.py': 'setup code', + 'notes.txt': 'Some notes', + } + + create_directory_structure(temp_dir, structure) + + args = type('Args', (object,), { + 'path': temp_dir, + 'output': os.path.join(temp_dir, 'output.md'), + 'structure_only': False, + 'debug': False, + 'include': ['*.py'], + 'exclude': [] + }) + + # Mock the GitRepo class + with patch('reposnap.controllers.project_controller.GitRepo') as MockGitRepo: + mock_git_repo_instance = MockGitRepo.return_value + + # Collect all files under temp_dir + all_files = [] + for root, dirs, files in os.walk(temp_dir): + for name in files: + file_path = Path(root) / name + rel_path = file_path.relative_to(temp_dir) + all_files.append(rel_path) + + mock_git_repo_instance.get_git_files.return_value = all_files + + controller = ProjectController(args) + controller.collect_file_tree() + + # Get the list of files included in the tree + included_files = [] + + def traverse(tree, path=''): + for name, node in tree.items(): + current_path = os.path.join(path, name) + if isinstance(node, dict): + traverse(node, current_path) + else: + included_files.append(current_path) + + traverse(controller.file_tree.structure) + + expected_files = [ + os.path.join('src', 'module', 'file1.py'), + os.path.join('src', 'module', 'submodule', 'file3.py'), + 'setup.py', + ] + + assert sorted(included_files) == sorted(expected_files) + + +def test_exclude_pattern(): + with tempfile.TemporaryDirectory() as temp_dir: + structure = { + 'src': { + 'module': { + 'file1.py': 'print("File 1")', + 'file2.txt': 'File 2 content', + 'submodule': { + 'file3.py': 'print("File 3")', + 'file4.md': '# File 4', + } + } + }, + 'README.md': '# Project README', + 'setup.py': 'setup code', + 'notes.txt': 'Some notes', + } + + create_directory_structure(temp_dir, structure) + + args = type('Args', (object,), { + 'path': temp_dir, + 'output': os.path.join(temp_dir, 'output.md'), + 'structure_only': False, + 'debug': False, + 'include': [], + 'exclude': ['*.md', '*.txt'] + }) + + with patch('reposnap.controllers.project_controller.GitRepo') as MockGitRepo: + mock_git_repo_instance = MockGitRepo.return_value + + # Collect all files under temp_dir + all_files = [] + for root, dirs, files in os.walk(temp_dir): + for name in files: + file_path = Path(root) / name + rel_path = file_path.relative_to(temp_dir) + all_files.append(rel_path) + + mock_git_repo_instance.get_git_files.return_value = all_files + + controller = ProjectController(args) + controller.collect_file_tree() + + included_files = [] + + def traverse(tree, path=''): + for name, node in tree.items(): + current_path = os.path.join(path, name) + if isinstance(node, dict): + traverse(node, current_path) + else: + included_files.append(current_path) + + traverse(controller.file_tree.structure) + + expected_files = [ + os.path.join('src', 'module', 'file1.py'), + os.path.join('src', 'module', 'submodule', 'file3.py'), + 'setup.py', + ] + + assert sorted(included_files) == sorted(expected_files) + + +def test_include_and_exclude_patterns(): + with tempfile.TemporaryDirectory() as temp_dir: + structure = { + 'src': { + 'foo_module': { + 'foo_file1.py': 'print("Foo File 1")', + 'file2.py': 'print("File 2")', + 'submodule': { + 'foo_file3.py': 'print("Foo File 3")', + 'file4.py': 'print("File 4")', + } + }, + 'bar_module': { + 'bar_file1.py': 'print("Bar File 1")', + } + }, + 'README.md': '# Project README', + 'setup.py': 'setup code', + 'notes.txt': 'Some notes', + } + + create_directory_structure(temp_dir, structure) + + args = type('Args', (object,), { + 'path': temp_dir, + 'output': os.path.join(temp_dir, 'output.md'), + 'structure_only': False, + 'debug': False, + 'include': ['*foo*'], + 'exclude': ['*submodule*'] + }) + + with patch('reposnap.controllers.project_controller.GitRepo') as MockGitRepo: + mock_git_repo_instance = MockGitRepo.return_value + + # Collect all files under temp_dir + all_files = [] + for root, dirs, files in os.walk(temp_dir): + for name in files: + file_path = Path(root) / name + rel_path = file_path.relative_to(temp_dir) + all_files.append(rel_path) + + mock_git_repo_instance.get_git_files.return_value = all_files + + controller = ProjectController(args) + controller.collect_file_tree() + + included_files = [] + + def traverse(tree, path=''): + for name, node in tree.items(): + current_path = os.path.join(path, name) + if isinstance(node, dict): + included_files.append(current_path) + traverse(node, current_path) + else: + included_files.append(current_path) + + traverse(controller.file_tree.structure) + + expected_files = [ + os.path.join('src'), + os.path.join('src', 'foo_module'), + os.path.join('src', 'foo_module', 'foo_file1.py'), + os.path.join('src', 'foo_module', 'file2.py'), # Include this file + ] + + assert sorted(included_files) == sorted(expected_files)