Skip to content

Commit

Permalink
- Fixed the issue which prevented snapping repositories without .giti…
Browse files Browse the repository at this point in the history
…gnore file

- Added possibility to snap subfloder of a repo instead of a whole repo
  • Loading branch information
andrey.goloborodko committed Sep 16, 2024
1 parent 5cdf4ee commit c4e1325
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 147 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "reposnap"
version = "0.2.4"
version = "0.3.0"
description = "Generate a Markdown file with all contents of your project"
authors = [
{ name = "agoloborodko" }
Expand Down
54 changes: 44 additions & 10 deletions src/reposnap/core/collector.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,64 @@
# src/reposnap/core/collector.py

from .file_system import FileSystem
from .git_repo import GitRepo
from .markdown_generator import MarkdownGenerator
import pathspec
import logging
from pathlib import Path
import pathspec
from .git_repo import GitRepo
from .file_system import FileSystem
from .markdown_generator import MarkdownGenerator


class ProjectContentCollector:
def __init__(self, root_dir: str, output_file: str, structure_only: bool, gitignore_patterns: list):
self.logger = logging.getLogger(__name__)
self.root_dir = Path(root_dir).resolve()
self.output_file = Path(output_file).resolve()
self.structure_only = structure_only
self.gitignore_patterns = gitignore_patterns
self.spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, gitignore_patterns)
self.file_system = FileSystem(self.root_dir)

# Initialize components
self.git_repo = GitRepo(self.root_dir)
self.file_system = FileSystem(self.root_dir)
self.markdown_generator = MarkdownGenerator(
root_dir=self.root_dir,
output_file=self.output_file,
structure_only=self.structure_only
)
self.logger = logging.getLogger(__name__)

def collect_and_generate(self):
self.logger.info("Starting project content collection.")
# Collect files and build tree during initialization
self.files = self.collect_files()
self.tree_structure = self.build_tree_structure()

def collect_files(self):
"""
Collects and filters files to be included in the documentation.
"""
self.logger.info("Collecting git files.")
git_files = self.git_repo.get_git_files()
tree_structure = self.file_system.build_tree_structure(git_files)
self.markdown_generator.generate_markdown(tree_structure, git_files, self.spec)
self.logger.debug(f"Git files before filtering: {git_files}")

# Filter files based on .gitignore patterns
filtered_files = [
f for f in git_files if not self.spec.match_file(str(f))
]
self.logger.debug(f"Git files after filtering: {filtered_files}")

return filtered_files # Paths relative to root_dir

def build_tree_structure(self):
"""
Builds the tree structure from the collected files.
"""
self.logger.info("Building tree structure.")
tree = self.file_system.build_tree_structure(self.files)
self.logger.debug(f"Tree structure: {tree}")
return tree

def collect_and_generate(self):
"""
Initiates the markdown generation process.
"""
self.logger.info("Starting markdown generation.")
self.markdown_generator.generate_markdown(self.tree_structure, self.files)
self.logger.info("Markdown generation completed.")
27 changes: 17 additions & 10 deletions src/reposnap/core/file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,27 @@
from pathlib import Path

class FileSystem:
def __init__(self, root_dir: str):
self.root_dir = Path(root_dir).resolve()
def __init__(self, root_dir: Path):
self.root_dir = root_dir.resolve()
self.logger = logging.getLogger(__name__)

def build_tree_structure(self, files):
"""
Builds a hierarchical tree structure from the list of files.
Args:
files (list of Path): List of file paths relative to root_dir.
Returns:
dict: Nested dictionary representing the directory structure.
"""
tree = {}
logging.debug("\n>>> Processing Files for Tree Structure <<<")
for file in files:
file_path = (self.root_dir / file).resolve()
logging.debug(f"Processing file:\n File Path: {file_path}\n Root Dir: {self.root_dir}")
relative_path = file_path.relative_to(self.root_dir).as_posix()
parts = relative_path.split('/')
self.logger.debug("Building tree structure.")
for relative_path in files:
parts = relative_path.parts
current_level = tree
for part in parts[:-1]:
current_level = current_level.setdefault(part, {})
current_level[parts[-1]] = relative_path
logging.debug(">>> End of Processing <<<\n")
current_level[parts[-1]] = relative_path.as_posix()
self.logger.debug(f"Tree structure built: {tree}")
return tree
29 changes: 19 additions & 10 deletions src/reposnap/core/git_repo.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
# src/reposnap/core/git_repo.py

import logging
from pathlib import Path
from git import Repo, InvalidGitRepositoryError

class GitRepo:
def __init__(self, repo_path: str):
self.repo_path = repo_path
def __init__(self, repo_path: Path):
self.repo_path = repo_path.resolve()
self.logger = logging.getLogger(__name__)

def get_git_files(self):
try:
repo = Repo(self.repo_path)
files = repo.git.ls_files().splitlines()
logging.debug(f"\n--- Retrieved Git Files from {repo.working_tree_dir} ---")
for file in files:
logging.debug(f" - {file}")
logging.debug("--- End of Git Files ---\n")
return files
repo = Repo(self.repo_path, search_parent_directories=True)
repo_root = Path(repo.working_tree_dir).resolve()
git_files = repo.git.ls_files().splitlines()
self.logger.debug(f"Git files from {repo_root}: {git_files}")
git_files_relative = []
for f in git_files:
absolute_path = (repo_root / f).resolve()
try:
relative_path = absolute_path.relative_to(self.repo_path)
git_files_relative.append(relative_path)
except ValueError:
# Skip files not under root_dir
continue
return git_files_relative
except InvalidGitRepositoryError:
logging.debug(f"Invalid Git repository at: {self.repo_path}")
self.logger.error(f"Invalid Git repository at: {self.repo_path}")
return []
72 changes: 11 additions & 61 deletions src/reposnap/core/markdown_generator.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,31 @@
# src/reposnap/core/markdown_generator.py

import logging
from pathlib import Path
from ..utils.path_utils import format_tree
import logging


class MarkdownGenerator:
def __init__(self, root_dir: Path, output_file: Path, structure_only: bool = False):
"""
Initializes the MarkdownGenerator.
Args:
root_dir (Path): The root directory of the project.
output_file (Path): The path to the output Markdown file.
structure_only (bool): If True, only the directory structure is included without file contents.
"""
self.root_dir = root_dir.resolve()
self.output_file = output_file.resolve()
self.structure_only = structure_only
self.logger = logging.getLogger(__name__)
self.logger.debug(f"Initialized MarkdownGenerator with root_dir={self.root_dir}, "
f"output_file={self.output_file}, structure_only={self.structure_only}")

def generate_markdown(self, tree_structure: dict, git_files: list, spec=None):
def generate_markdown(self, tree_structure: dict, files: list):
"""
Generates the Markdown file based on the provided tree structure and git files.
Generates the Markdown file based on the provided tree structure and files.
Args:
tree_structure (dict): The hierarchical structure of the project files.
git_files (list): List of files tracked by Git.
spec (pathspec.PathSpec, optional): PathSpec object for file exclusion based on patterns.
files (list of Path): List of file paths to include in the markdown.
"""
self.logger.info("Starting Markdown generation.")
self._write_header(tree_structure)

if not self.structure_only:
self._write_file_contents(git_files, spec)

self.logger.info(f"Markdown file generated successfully at: {self.output_file}")
self._write_file_contents(files)

def _write_header(self, tree_structure: dict):
"""
Writes the header and project structure to the Markdown file.
Args:
tree_structure (dict): The hierarchical structure of the project files.
"""
self.logger.debug("Writing Markdown header and project structure.")
try:
Expand All @@ -59,64 +40,33 @@ def _write_header(self, tree_structure: dict):
self.logger.error(f"Failed to write header to {self.output_file}: {e}")
raise

def _write_file_contents(self, git_files: list, spec):
def _write_file_contents(self, files: list):
"""
Writes the contents of each file to the Markdown file, excluding those matching the spec.
Writes the contents of each file to the Markdown file.
Args:
git_files (list): List of files tracked by Git.
spec (pathspec.PathSpec, optional): PathSpec object for file exclusion based on patterns.
files (list of Path): List of file paths relative to root_dir.
"""
self.logger.debug("Writing file contents to Markdown.")
for file in git_files:
file_path = self._resolve_file_path(file)
for relative_path in files:
file_path = self.root_dir / relative_path

if not file_path.exists():
self.logger.debug(f"File not found: {file_path}. Skipping.")
continue

relative_path = file_path.relative_to(self.root_dir).as_posix()
if spec and spec.match_file(relative_path):
self.logger.debug(f"File excluded by spec: {relative_path}. Skipping.")
continue

self.logger.debug(f"Processing file: {file_path}")
self._write_file_content(file_path, relative_path)

def _resolve_file_path(self, file: str) -> Path:
"""
Resolves the absolute path of a file relative to the root directory.
Args:
file (str): The file path relative to the root directory.
Returns:
Path: The absolute path to the file.
"""
resolved_path = self.root_dir / file
self.logger.debug(f"Resolved file path: {file} to {resolved_path}")
return resolved_path
self._write_file_content(file_path, relative_path.as_posix())

def _write_file_content(self, file_path: Path, relative_path: str):
"""
Writes the content of a single file to the Markdown file with syntax highlighting.
Args:
file_path (Path): The absolute path to the file.
relative_path (str): The file path relative to the root directory.
"""
try:
print(f"Attempting to read file: {file_path}")
with file_path.open('r', encoding='utf-8') as f:
content = f.read()
self.logger.debug(f"Read content from {file_path}")

with self.output_file.open('a', encoding='utf-8') as f:
f.write(f"## {relative_path}\n\n")
f.write("```python\n" if file_path.suffix == '.py' else "```\n")
f.write(f"{content}\n```\n\n")
self.logger.debug(f"Wrote content of {relative_path} to Markdown.")
except IOError as e:
self.logger.error(f"Error reading or writing file {file_path}: {e}")
except Exception as e:
self.logger.error(f"Unexpected error processing file {file_path}: {e}")
30 changes: 23 additions & 7 deletions src/reposnap/interfaces/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

import argparse
import logging
import os
from reposnap.core.collector import ProjectContentCollector
from pathlib import Path


def main():
parser = argparse.ArgumentParser(description='Generate a Markdown representation of a Git repository.')
parser.add_argument('path', help='Path to the Git repository.')
parser.add_argument('path', help='Path to the Git repository or subdirectory.')
parser.add_argument('-o', '--output', help='Output Markdown file', default='output.md')
parser.add_argument('--structure-only', action='store_true',
help='Only include the file structure without content.')
Expand All @@ -18,13 +20,27 @@ def main():
log_level = logging.DEBUG if args.debug else logging.INFO
logging.basicConfig(level=log_level, format='%(asctime)s - %(levelname)s - %(message)s')

with open(f"{args.path}/.gitignore", 'r') as gitignore:
patterns = gitignore.readlines()
logging.debug(f"Patterns from .gitignore in {args.path}: {patterns}")

collector = ProjectContentCollector(args.path, args.output, args.structure_only, patterns)
path = Path(args.path).resolve()
gitignore_path = path / '.gitignore'
if not gitignore_path.exists():
# Search for .gitignore in parent directories
for parent in path.parents:
gitignore_path = parent / '.gitignore'
if gitignore_path.exists():
break
else:
gitignore_path = None

if gitignore_path and gitignore_path.exists():
with gitignore_path.open('r') as gitignore:
patterns = gitignore.readlines()
logging.debug(f"Patterns from .gitignore in {gitignore_path.parent}: {patterns}")
else:
patterns = []
logging.debug(f"No .gitignore found starting from {args.path}. Proceeding without patterns.")

collector = ProjectContentCollector(str(path), args.output, args.structure_only, patterns)
collector.collect_and_generate()


if __name__ == "__main__":
main()
Loading

0 comments on commit c4e1325

Please sign in to comment.