Skip to content

Commit

Permalink
Deploy
Browse files Browse the repository at this point in the history
  • Loading branch information
truskovskiyk committed Dec 1, 2024
1 parent ce22862 commit 2a88ab6
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions ai-search-demo/ai_search_demo/qdrant_inexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def get_pdf_images(pdf_path):
text = page.extract_text()
page_texts.append(text)
# Convert to PIL images
images = convert_from_path(pdf_path)
images = convert_from_path(pdf_path, dpi=100, fmt="jpeg", jpegopt={"quality": 75, "progressive": True, "optimize": True})
assert len(images) == len(page_texts)
return images, page_texts

Expand Down Expand Up @@ -180,7 +180,6 @@ def pdfs_to_hf_dataset(path_to_folder):
global_index += 1
# Print memory usage after processing each image
current, peak = tracemalloc.get_traced_memory()
print(f"IMAGE: Current memory usage is {current / 10**6}MB; Peak was {peak / 10**6}MB")

# Print memory usage after processing each PDF
current, peak = tracemalloc.get_traced_memory()
Expand Down

0 comments on commit 2a88ab6

Please sign in to comment.