Skip to content

Commit

Permalink
add images
Browse files Browse the repository at this point in the history
  • Loading branch information
truskovskiyk committed Dec 22, 2024
1 parent 168e1b7 commit ad019fd
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
25 changes: 15 additions & 10 deletions no-ocr-api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,31 +304,36 @@ def ai_search(
if not search_results:
return {"message": "No results found."}

# dataset_path = os.path.join(settings.STORAGE_DIR, collection_name, settings.HF_DATASET_DIRNAME)
# if not os.path.exists(dataset_path):
# raise HTTPException(status_code=404, detail="Dataset for this collection not found.")
dataset_path = os.path.join(settings.STORAGE_DIR, collection_name, settings.HF_DATASET_DIRNAME)
if not os.path.exists(dataset_path):
raise HTTPException(status_code=404, detail="Dataset for this collection not found.")

# dataset = load_from_disk(dataset_path)
dataset = load_from_disk(dataset_path)
search_results_data = []
for result in search_results.points:
payload = result.payload
print(payload)
score = result.score
# image_data = dataset[payload['index']]['image']
# pdf_name = dataset[payload['index']]['pdf_name']
# pdf_page = dataset[payload['index']]['pdf_page']
image_data = dataset[payload['index']]['image']
pdf_name = dataset[payload['index']]['pdf_name']
pdf_page = dataset[payload['index']]['pdf_page']

# Prepare LLM interpretation
# image_obj = PIL.Image.fromarray(image_data) if not isinstance(image_data, PIL.Image.Image) else image_data
# vllm_output = call_vllm(image_obj)
pdf_name = 1
pdf_page = 1
vllm_output = "mock"

# Convert image to base64 string
buffered = BytesIO()
image_data.save(buffered, format="JPEG")
img_b64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

search_results_data.append({
"score": score,
"pdf_name": pdf_name,
"pdf_page": pdf_page,
"llm_interpretation": vllm_output
"llm_interpretation": vllm_output,
"image_base64": img_b64_str # Add image data to the response
})

return {"search_results": search_results_data}
Expand Down
9 changes: 8 additions & 1 deletion no-ocr-ui/src/components/Search.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,14 @@ export default function Search() {
{result.llm_interpretation}
</p>
<div className="mt-2 text-sm text-gray-500">
Relevance: {(result.score * 100).toFixed(1)}%
Score: {result.score.toFixed(2)}
</div>
<div className="mt-2">
<img
src={`data:image/jpeg;base64,${result.image_base64}`}
alt={`Page ${result.pdf_page} of ${result.pdf_name}`}
className="w-full h-auto rounded-md max-w-xs"
/>
</div>
</div>
))}
Expand Down

0 comments on commit ad019fd

Please sign in to comment.