From f27dd6990dff42676f4443c4f0180c94f136d408 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 26 Sep 2024 17:43:02 +0200
Subject: [PATCH] add reranking test

---
 examples/server/tests/features/rerank.feature | 49 ++++++++++---------
 examples/server/tests/features/steps/steps.py | 40 +++++++++++++++
 2 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/examples/server/tests/features/rerank.feature b/examples/server/tests/features/rerank.feature
index e6981673d2d2c..72616d6c5d4c4 100644
--- a/examples/server/tests/features/rerank.feature
+++ b/examples/server/tests/features/rerank.feature
@@ -9,31 +9,34 @@ Feature: llama.cpp server
     And   a model alias jina-reranker-v1-tiny-en
     And   42 as server seed
     And   2 slots
-    And   128 as batch size
-    And   128 as ubatch size
+    And   512 as batch size
+    And   512 as ubatch size
     And   512 KV cache size
     And   embeddings extraction
     Then  the server is starting
     Then  the server is healthy
 
-# TODO: implement some tests
-#       https://github.com/ggerganov/llama.cpp/pull/9510
-#  Scenario: Rerank
-#    Given a prompt:
-#      """
-#      What is panda?
-#      """
-#    And a prompt:
-#      """
-#      Hi.
-#      """
-#    And a prompt:
-#      """
-#      It's a bear.
-#      """
-#    And a prompt:
-#      """
-#      The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.
-#      """
-#    When reranking request
-#    Then reranking results are returned
+  Scenario: Rerank
+    Given a rerank query:
+      """
+      Organic skincare products for sensitive skin
+      """
+    And   a rerank document:
+      """
+      Organic skincare for sensitive skin with aloe vera and chamomile: Imagine the soothing embrace of nature with our organic skincare range, crafted specifically for sensitive skin. Infused with the calming properties of aloe vera and chamomile, each product provides gentle nourishment and protection. Say goodbye to irritation and hello to a glowing, healthy complexion.
+      """
+    And   a rerank document:
+      """
+      New makeup trends focus on bold colors and innovative techniques: Step into the world of cutting-edge beauty with this seasons makeup trends. Bold, vibrant colors and groundbreaking techniques are redefining the art of makeup. From neon eyeliners to holographic highlighters, unleash your creativity and make a statement with every look.
+      """
+    And   a rerank document:
+      """
+      Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras: Entra en el fascinante mundo del maquillaje con las tendencias más actuales. Colores vivos y técnicas innovadoras están revolucionando el arte del maquillaje. Desde delineadores neón hasta iluminadores holográficos, desata tu creatividad y destaca en cada look.
+      """
+    And   a rerank document:
+      """
+      新的化妆趋势注重鲜艳的颜色和创新的技巧：进入化妆艺术的新纪元，本季的化妆趋势以大胆的颜色和创新的技巧为主。无论是霓虹眼线还是全息高光，每一款妆容都能让您脱颖而出，展现独特魅力。
+      """
+    When  reranking request
+    Then  reranking results are returned
+    Then  reranking highest score is index 2
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 0fea0fe87b799..9ae2ce67bafc9 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -83,6 +83,10 @@ def step_server_config(context, server_fqdn: str, server_port: str):
     context.concurrent_tasks = []
     context.prompts = []
 
+    context.reranking_query = None
+    context.reranking_documents = []
+    context.reranking_results = None
+
 
 @step('a model file {hf_file} from HF repo {hf_repo}')
 def step_download_hf_model(context, hf_file: str, hf_repo: str):
@@ -452,6 +456,14 @@ def step_impl(context, n_ga_w):
 def step_prompt_passkey(context):
     context.prompt_passkey = context_text(context)
 
+@step('a rerank query')
+def step_set_rerank_query(context):
+    context.reranking_query = context_text(context)
+    context.reranking_documents = []
+
+@step('a rerank document')
+def step_set_rerank_document(context):
+    context.reranking_documents.append(context_text(context))
 
 @step('{n_prompts:d} fixed prompts')
 def step_fixed_prompts(context, n_prompts):
@@ -619,6 +631,22 @@ async def step_compute_embedding(context):
     context.embeddings = await request_embedding(context_text(context), None, base_url=context.base_url)
 
 
+@step('reranking request')
+@async_run_until_complete
+async def step_compute_reranking(context):
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
+        async with session.post(f'{context.base_url}/reranking',
+                                json={
+                                    "query": context.reranking_query,
+                                    "documents": context.reranking_documents,
+                                }) as response:
+            if response.status == 200:
+                response_json = await response.json()
+                context.reranking_results = response_json['results']
+            else:
+                context.reranking_results = response.status
+
+
 @step('all embeddings are the same')
 @async_run_until_complete
 async def step_all_embeddings_are_the_same(context):
@@ -704,6 +732,18 @@ async def all_embeddings_are_generated(context):
     for i in range(n_embedding_requests):
         assert_embeddings(context.tasks_result.pop().pop())
 
+@step('reranking results are returned')
+def reranking_results_are_returned(context):
+    assert len(context.reranking_results) == len(context.reranking_documents)
+
+@step('reranking highest score is index {idx:d}')
+def reranking_results_are_returned(context, idx: int):
+    max_score, max_idx = 0, 0
+    for res in context.reranking_results:
+        if max_score < res['relevance_score']:
+            max_score = res['relevance_score']
+            max_idx   = res['index']
+    assert max_idx == idx
 
 @step('adding special tokens')
 def step_tokenize_set_add_special(context):