Skip to content

Commit

Permalink
remove log files in test data
Browse files Browse the repository at this point in the history
  • Loading branch information
HaichaoLihc authored and FranardoHuang committed Nov 21, 2024
1 parent 199ba0b commit 07e0c9c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 13 deletions.
21 changes: 11 additions & 10 deletions rag/file_conversion_router/services/directory_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,17 @@ def process_folder(input_dir: Union[str, Path], output_dir: Union[str, Path],
# Iterate over all files with specified extensions
for input_file_path in input_dir.rglob("*"):
if input_file_path.suffix in valid_extensions and input_file_path.is_file():
file_hash = calculate_hash(input_file_path)
if is_empty_md(file_hash):
content_logger.error(f"conversion skipped: found empty markdown file {input_file_path}")
continue
cached_result = persistent_cache.get(file_hash, None)
if cached_result and cached_result == conversion_version:
content_logger.info(f"Using persistent cached result version {conversion_version} for {input_file_path}")
continue
else:
persistent_cache[file_hash] = conversion_version
if cache_dir:
file_hash = calculate_hash(input_file_path)
if is_empty_md(file_hash):
content_logger.error(f"conversion skipped: found empty markdown file {input_file_path}")
continue
cached_result = persistent_cache.get(file_hash, None)
if cached_result and cached_result == conversion_version:
content_logger.info(f"Using persistent cached result version {conversion_version} for {input_file_path}")
continue
else:
persistent_cache[file_hash] = conversion_version

# Construct the output subdirectory and file path
output_subdir = output_dir / input_file_path.relative_to(input_dir).parent
Expand Down

This file was deleted.

This file was deleted.

0 comments on commit 07e0c9c

Please sign in to comment.