diff --git a/docker/hpu.dockerfile b/docker/hpu.dockerfile index a483af4b..0e5ce6d1 100644 --- a/docker/hpu.dockerfile +++ b/docker/hpu.dockerfile @@ -1,4 +1,4 @@ -FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.0:latest as hpu +FROM vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/root:/usr/lib/habanalabs/ @@ -24,4 +24,4 @@ RUN cd /GenAIEval && \ pip install --upgrade-strategy eager optimum[habana] && \ pip list -WORKDIR /GenAIEval/ \ No newline at end of file +WORKDIR /GenAIEval/ diff --git a/evals/evaluation/rag_eval/examples/eval_multihop.py b/evals/evaluation/rag_eval/examples/eval_multihop.py index a077d9f1..6bc49fbb 100644 --- a/evals/evaluation/rag_eval/examples/eval_multihop.py +++ b/evals/evaluation/rag_eval/examples/eval_multihop.py @@ -39,6 +39,22 @@ def get_document(self, data: dict): ) return document + def get_reranked_documents(self, query, docs, arguments): + data = { + "initial_query": query, + "retrieved_docs": [{"text": doc} for doc in docs], + "top_n": 10, + } + headers = {"Content-Type": "application/json"} + + response = requests.post(arguments.reranking_endpoint, data=json.dumps(data), headers=headers) + if response.ok: + reranked_documents = response.json()["documents"] + return reranked_documents + else: + print(f"Request for retrieval failed due to {response.text}.") + return [] + def get_retrieved_documents(self, query, arguments): data = {"text": query} headers = {"Content-Type": "application/json"} @@ -77,6 +93,8 @@ def get_retrieval_metrics(self, all_queries, arguments): continue query = data["query"] retrieved_documents = self.get_retrieved_documents(query, arguments) + if arguments.rerank: + retrieved_documents = self.get_reranked_documents(query, retrieved_documents, arguments) golden_context = [each["fact"] for each in data["evidence_list"]] test_case = { "input": query, @@ -212,6 +230,10 @@ def args_parser(): parser.add_argument( "--retrieval_endpoint", type=str, default="http://localhost:7000/v1/retrieval", help="Service URL address." ) + parser.add_argument("--rerank", action="store_true", help="Whether to use rerank microservice.") + parser.add_argument( + "--reranking_endpoint", type=str, default="http://localhost:8000/v1/reranking", help="Service URL address." + ) parser.add_argument("--llm_endpoint", type=str, default=None, help="Service URL address.") parser.add_argument( "--show_progress_bar", action="store", default=True, type=bool, help="Whether to show a progress bar" diff --git a/evals/metrics/ragas/ragas.py b/evals/metrics/ragas/ragas.py index 15c11f84..4069a62c 100644 --- a/evals/metrics/ragas/ragas.py +++ b/evals/metrics/ragas/ragas.py @@ -31,7 +31,7 @@ def __init__( self.model = model self.embeddings = embeddings self.metrics = metrics - self.validated_list = ["answer_relevancy", "faithfulness"] + self.validated_list = ["answer_relevancy", "faithfulness", "answer_correctness"] async def a_measure(self, test_case: Dict): return self.measure(test_case)