Skip to content

Commit

Permalink
fix missing pkl file bug and use md_file in to_page
Browse files Browse the repository at this point in the history
  • Loading branch information
Catrunaround committed Jul 20, 2024
1 parent 343234d commit f2aef2a
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 26 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
1. (7.0 points) What Would Python Display? Assume the following code has been executed. The Link class appears on the midterm 2 study guide (page 2, left side). def shake(it): if it is not Link.empty and it.rest is not Link.empty: if it.first + 1 < it.rest.first: it.rest = Link(it.rest.first-1, it.rest) shake(it) else: shake(it.rest) it = Link(2, Link(5, Link(7))) off = Link(1, it.rest) shake(it) def cruel(summer): while summer is not Link.empty: yield summer.first summer = summer.rest if summer is not Link.empty: summer = summer.rest summer = Link(1, Link(2, Link(3, Link(4)))) Write the output printed for each expression below or _Error_ if an error occurs. 1. (2.0 pt) print(it) <2 5 7> <2 4 5 7> <2 4 5 6 7> <2 3 4 5 7> <2 4 3 5 7> <2 3 4 5 6 7> <2 4 3 5 6 7> (2.0 pt) print(off) <1 5 6 7> (2.0 pt) print([x*x for x in cruel(summer)]) [1, 9]

**(d) (1.0 pt)** What is the order of growth of the time it takes to evaluate shake(Link(1, Link(n))) in terms of n?

exponential

quadratic

linear

constant
Binary file not shown.
2 changes: 2 additions & 0 deletions rag/file_conversion_router/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ def convert_directory(input_dir: Union[str, Path], output_dir: Union[str, Path])
"""
process_folder(input_dir, output_dir)

if __name__ == "__main__":
convert_directory("output_tmp\\input", "output_tmp\expected_output")
12 changes: 6 additions & 6 deletions rag/file_conversion_router/conversion/md_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ def _to_markdown(self, input_path: Path, output_path: Path) -> Path:
def _to_page(self, input_path: Path, output_path: Path) -> Page:
"""Perform Markdown to Page conversion."""
try:
md_file_path = self._to_markdown(input_path, output_path)
md_file = self._to_markdown(input_path, output_path,)
except Exception as e:
self._logger.error(f"An error occurred during markdown conversion: {str(e)}")
raise

output_path.parent.mkdir(parents=True, exist_ok=True)

filetype = md_file_path.suffix.lstrip('.')
with open(md_file_path, "r") as input_file:
filetype = md_file.suffix.lstrip('.')
with open(md_file, "r") as input_file:
text = input_file.read()

metadata_path = md_file_path.with_name(f"{md_file_path.stem}_metadata.yaml")

metadata_path = input_path.with_name(f"{input_path.stem}_metadata.yaml")
metadata_content = self._read_metadata(metadata_path)
url = metadata_content.get("URL")
return Page(pagename=md_file_path.stem, content={'text': text}, filetype=filetype, page_url=url)

return Page(pagename = input_path.stem, content={'text': text}, filetype=filetype, page_url=url)
14 changes: 6 additions & 8 deletions rag/file_conversion_router/conversion/pdf_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,21 +182,19 @@ def _to_markdown(self, input_path: Path, output_path: Path) -> Path:
def _to_page(self, input_path: Path, output_path: Path) -> Page:
"""Perform Markdown to Page conversion."""
try:
md_file_path = self._to_markdown(input_path, output_path,)
md_file = self._to_markdown(input_path, output_path,)
except Exception as e:
self._logger.error(f"An error occurred during markdown conversion: {str(e)}")
raise

output_path.parent.mkdir(parents=True, exist_ok=True)

filetype = md_file_path.suffix.lstrip('.')
with open(md_file_path, "r") as input_file:
text = md_file_path.read()
filetype = md_file.suffix.lstrip('.')
with open(md_file, "r") as input_file:
text = input_file.read()


metadata_path = md_file_path.with_name(f"{md_file_path.stem}_metadata.yaml")
metadata_path = input_path.with_name(f"{input_path.stem}_metadata.yaml")
metadata_content = self._read_metadata(metadata_path)
url = metadata_content.get("URL")
print("PDF",url)
return Page(pagename = md_file_path.stem, content={'text': text}, filetype=filetype, page_url = url)

return Page(pagename=input_path.stem, content={'text': text}, filetype=filetype, page_url=url)
11 changes: 6 additions & 5 deletions rag/file_conversion_router/conversion/rst_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,22 @@ def _to_markdown(self, input_path: Path, output_path: Path) -> Path:
def _to_page(self, input_path: Path, output_path: Path) -> Page:
"""Perform Markdown to Page conversion."""
try:
md_file_path = self._to_markdown(input_path, output_path)
md_file = self._to_markdown(input_path, output_path,)
except Exception as e:
self._logger.error(f"An error occurred during markdown conversion: {str(e)}")
raise

output_path.parent.mkdir(parents=True, exist_ok=True)

filetype = md_file_path.suffix.lstrip('.')
with open(md_file_path, "r") as input_file:
filetype = md_file.suffix.lstrip('.')
with open(md_file, "r") as input_file:
text = input_file.read()

metadata_path = md_file_path.with_name(f"{md_file_path.stem}_metadata.yaml")

metadata_path = input_path.with_name(f"{input_path.stem}_metadata.yaml")
metadata_content = self._read_metadata(metadata_path)
url = metadata_content.get("URL")
return Page(pagename=md_file_path.stem, content={'text': text}, filetype=filetype, page_url=url)
return Page(pagename = input_path.stem, content={'text': text}, filetype=filetype, page_url=url)


# converter = RstConverter()
Expand Down
12 changes: 6 additions & 6 deletions rag/file_conversion_router/conversion/video_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,19 +170,19 @@ def _to_markdown(self, input_path, output_path):
def _to_page(self, input_path: Path, output_path: Path) -> Page:
"""Perform Markdown to Page conversion."""
try:
md_file_path = self._to_markdown(input_path, output_path)
md_file = self._to_markdown(input_path, output_path,)
except Exception as e:
self._logger.error(f"An error occurred during markdown conversion: {str(e)}")
raise

output_path.parent.mkdir(parents=True, exist_ok=True)

filetype = md_file_path.suffix.lstrip('.')
with open(md_file_path, "r") as input_file:
filetype = md_file.suffix.lstrip('.')
with open(md_file, "r") as input_file:
text = input_file.read()

metadata_path = md_file_path.with_name(f"{md_file_path.stem}_metadata.yaml")

metadata_path = input_path.with_name(f"{input_path.stem}_metadata.yaml")
metadata_content = self._read_metadata(metadata_path)
url = metadata_content.get("URL")
return Page(pagename=md_file_path.stem, content={'text': text}, filetype=filetype, page_url=url)

return Page(pagename = input_path.stem, content={'text': text}, filetype=filetype, page_url=url)
2 changes: 1 addition & 1 deletion rag/file_conversion_router/services/directory_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def process_folder(input_dir: Union[str, Path], output_dir: Union[str, Path]) ->
if input_file_path.suffix in valid_extensions and input_file_path.is_file():
# Construct the output subdirectory and file path
output_subdir = output_dir / input_file_path.relative_to(input_dir).parent
output_subdir.mkdir(parents=True, exist_ok=True)
# output_subdir.mkdir(parents=True, exist_ok=True)
output_file_path = output_subdir / input_file_path.stem

# Instantiate a new converter object for each file based on the file extension
Expand Down

0 comments on commit f2aef2a

Please sign in to comment.