diff --git a/comps/dataprep/pinecone/data/nke-10k-2023.pdf b/comps/dataprep/pinecone/data/nke-10k-2023.pdf deleted file mode 100644 index 6ade8863e8..0000000000 Binary files a/comps/dataprep/pinecone/data/nke-10k-2023.pdf and /dev/null differ diff --git a/comps/retrievers/langchain/pinecone/ingest.py b/comps/retrievers/langchain/pinecone/ingest.py index e17b5ebf5b..15097e4419 100644 --- a/comps/retrievers/langchain/pinecone/ingest.py +++ b/comps/retrievers/langchain/pinecone/ingest.py @@ -63,7 +63,7 @@ def ingest_documents(): contains Edgar 10k filings data for Nike.""" # Load list of pdfs company_name = "Nike" - data_path = "../data/" + data_path = "data/" doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0] print("Parsing 10k filing doc for NIKE", doc_path) diff --git a/comps/retrievers/langchain/pinecone/retriever_pinecone.py b/comps/retrievers/langchain/pinecone/retriever_pinecone.py index 43cdbd5a20..d28aea5294 100644 --- a/comps/retrievers/langchain/pinecone/retriever_pinecone.py +++ b/comps/retrievers/langchain/pinecone/retriever_pinecone.py @@ -55,12 +55,12 @@ def retrieve(input: EmbedDoc) -> SearchedDoc: if input.distance_threshold is None: raise ValueError("distance_threshold must be provided for " + "similarity_distance_threshold retriever") docs_and_similarities = vector_db.similarity_search_by_vector_with_score( - embedding=input.embedding, k=input.k, distance_threshold=input.distance_threshold + embedding=input.embedding, k=input.k ) search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.distance_threshold] elif input.search_type == "similarity_score_threshold": docs_and_similarities = vector_db.similarity_search_by_vector_with_score( - query=input.text, k=input.k, score_threshold=input.score_threshold + query=input.text, k=input.k ) search_res = [doc for doc, similarity in docs_and_similarities if similarity > input.score_threshold] elif input.search_type == "mmr":