diff --git a/fastchat/serve/gradio_block_arena_vision.py b/fastchat/serve/gradio_block_arena_vision.py index c56685902..09139a2ec 100644 --- a/fastchat/serve/gradio_block_arena_vision.py +++ b/fastchat/serve/gradio_block_arena_vision.py @@ -236,12 +236,15 @@ def parse_pdf(file_path): assert ( "LLAMA_CLOUD_API_KEY" in os.environ ), "Make sure to specify LlamaParse API key." - documents = LlamaParse( + document = LlamaParse( result_type="markdown", verbose=True, ).load_data(file_path) + + assert len(document) > 0 + output = document[0].text - return documents + return output def _prepare_text_with_image(state, text, images, csam_flag): @@ -255,13 +258,10 @@ def _prepare_text_with_image(state, text, images, csam_flag): return text -def _prepare_text_with_pdf(state, text, pdfs): +def _prepare_text_with_pdf(text, pdfs): if len(pdfs) > 0: - # if len(state.conv.get_pdfs()) > 0: - state.conv = get_conversation_template(state.model_name) - assert len(text) > 0 - document_content = parse_pdf(pdfs[0]) + print("Document processed") text = wrap_pdfchat_query(text, document_content) return text diff --git a/fastchat/serve/gradio_block_arena_vision_anony.py b/fastchat/serve/gradio_block_arena_vision_anony.py index 31f0f343b..18361510b 100644 --- a/fastchat/serve/gradio_block_arena_vision_anony.py +++ b/fastchat/serve/gradio_block_arena_vision_anony.py @@ -375,11 +375,12 @@ def add_text( ) text = text[:BLIND_MODE_INPUT_CHAR_LEN_LIMIT] # Hard cut-off + post_processed_text = _prepare_text_with_pdf(text, pdfs) + for i in range(num_sides): post_processed_text = _prepare_text_with_image( - states[i], text, images, csam_flag=csam_flag + states[i], post_processed_text, images, csam_flag=csam_flag ) - post_processed_text = _prepare_text_with_pdf(states[i], text, pdfs) states[i].conv.append_message(states[i].conv.roles[0], post_processed_text) states[i].conv.append_message(states[i].conv.roles[1], None)