From d76751ad8184831421b2bebbc347c5cce5de581b Mon Sep 17 00:00:00 2001
From: "Dong, Bo" <472452063@qq.com>
Date: Tue, 10 Sep 2024 12:08:42 +0800
Subject: [PATCH] optimize rerank with backend ref (#579)

* add rerank with neural speed

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add the code

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add the code

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* fix mismatched response format w/wo streaming guardrails (#568)

* fix mismatched response format w/wo streaming  guardrails

* fix & debug

* fix & rm debug

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Fix guardrails out handle logics for space linebreak and quote (#571)

* fix mismatched response format w/wo streaming  guardrails

* fix & debug

* fix & rm debug

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* debug

* debug

* debug

* fix pre-space and linebreak

* fix pre-space and linebreak

* fix single/double quote

* fix single/double quote

* remove debug

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* BUG FIX: LVM security fix (#572)

* add url validator

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add validation for video_url

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

---------

Signed-off-by: BaoHuiling <huiling.bao@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Modify output messages. (#569)

* Reduced output.

Signed-off-by: zepan <ze.pan@intel.com>

* Output the location where the modified Dockerfile file is referenced.

Signed-off-by: zepan <ze.pan@intel.com>

* for test

Signed-off-by: zepan <ze.pan@intel.com>

* Restore test file.

Signed-off-by: zepan <ze.pan@intel.com>

---------

Signed-off-by: zepan <ze.pan@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* refine logging code. (#559)

* add ut and refine logging code.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update microservice port.

---------

Co-authored-by: root <root@idc708073.jf.intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* adding lancedb to langchain vectorstores (#291)

* adding lancedb to langchain vectorstores

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: lvliang-intel <liang1.lv@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Refine Dataprep Milvus MS (#570)

Signed-off-by: letonghan <letong.han@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* final version

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* update the readme

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add the sign

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* fix error for pre ci

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add the ut

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* update docker file

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* update CI test log achieve (#577)

Signed-off-by: chensuyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Multimodal dataprep (#575)

* multimodal embedding for MM RAG for videos

Signed-off-by: Tiep Le <tiep.le@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* develop data prep first commit

Signed-off-by: Tiep Le <tiep.le@intel.com>

* develop dataprep microservice for multimodal data

Signed-off-by: Tiep Le <tiep.le@intel.com>

* multimodal langchain for dataprep

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update README

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update README

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update README

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update README

Signed-off-by: Tiep Le <tiep.le@intel.com>

* cosmetic

Signed-off-by: Tiep Le <tiep.le@intel.com>

* test for multimodal dataprep

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update test

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update test

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update test

Signed-off-by: Tiep Le <tiep.le@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* cosmetic update

Signed-off-by: Tiep Le <tiep.le@intel.com>

* remove langsmith

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update API to remove /dataprep from API names and remove langsmith

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update test

Signed-off-by: Tiep Le <tiep.le@intel.com>

* update the error message per PR reviewer

Signed-off-by: Tiep Le <tiep.le@intel.com>

---------

Signed-off-by: Tiep Le <tiep.le@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add: Pathway vector store and retriever as LangChain component (#342)

* nb

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* init changes

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* docker

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* example data

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* docs(readme): update, add commands

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: formatting, data sources

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* docs(readme): update instructions, add comments

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: rm unused parts

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: image name, compose env vars

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: rm unused part

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: logging name

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: env var

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: rename pw docker

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* docs(readme): update input sources

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* nb

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* init changes

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: formatting, data sources

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* docs(readme): update instructions, add comments

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: rm unused part

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* fix: rename pw docker

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* feat: mv vector store, naming, clarify instructions, improve ingestion components

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* tests: add pw retriever test
fix: update docker to include libmagic

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* implement suggestions from review, entrypoint, reqs, comments, https_proxy.

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix: update docker tags in test and readme

Signed-off-by: Berke <berkecanrizai1@gmail.com>

* tests: add separate pathway vectorstore test

Signed-off-by: Berke <berkecanrizai1@gmail.com>

---------

Signed-off-by: Berke <berkecanrizai1@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Sihan Chen <39623753+Spycsh@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Add local Rerank microservice for VideoRAGQnA (#496)

* initial commit

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* save

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* add readme, test script, fix bug

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* update video URL

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* use default

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update core dependency

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* use p 5000

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* use 5037

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* update ctnr name

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* remove langsmith

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add rerank algo desc in readme

Signed-off-by: BaoHuiling <huiling.bao@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: BaoHuiling <huiling.bao@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Add Scan Container. (#560)

Signed-off-by: zepan <ze.pan@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* fix SearchedMultimodalDoc in docarray (#583)

Signed-off-by: BaoHuiling <huiling.bao@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* update image build yaml (#529)

Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: zepan <ze.pan@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add microservice for intent detection (#131)

* add microservice for intent detection

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update license copyright

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

* add ut

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

* refine

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update folder

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix test

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

---------

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Make the scanning method optional. (#580)

Signed-off-by: zepan <ze.pan@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add code owners (#586)

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* remove revision for tei (#584)

Signed-off-by: letonghan <letong.han@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* Bug fix (#591)

* Check if the document exists.

Signed-off-by: zepan <ze.pan@intel.com>

* Add flag output.

Signed-off-by: zepan <ze.pan@intel.com>

* Modify nginx readme.

Signed-off-by: zepan <ze.pan@intel.com>

* Modify document detection logic

Signed-off-by: zepan <ze.pan@intel.com>

---------

Signed-off-by: zepan <ze.pan@intel.com>
Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* fix ut issue

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* merge the main

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* align with new pipeline

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* align with newest pipeline

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* upload code

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* update the ut

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add docker path

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

* add the docker path

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>

---------

Signed-off-by: Dong, Bo1 <bo1.dong@intel.com>
Signed-off-by: BaoHuiling <huiling.bao@intel.com>
Signed-off-by: zepan <ze.pan@intel.com>
Signed-off-by: sharanshirodkar7 <ssharanshirodkar7@gmail.com>
Signed-off-by: letonghan <letong.han@intel.com>
Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: Tiep Le <tiep.le@intel.com>
Signed-off-by: Berke <berkecanrizai1@gmail.com>
Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Sihan Chen <39623753+Spycsh@users.noreply.github.com>
Co-authored-by: Huiling Bao <huiling.bao@intel.com>
Co-authored-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: lkk <33276950+lkk12014402@users.noreply.github.com>
Co-authored-by: root <root@idc708073.jf.intel.com>
Co-authored-by: Sharan Shirodkar <91109427+sharanshirodkar7@users.noreply.github.com>
Co-authored-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: Letong Han <106566639+letonghan@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: Tiep Le <97980157+tileintel@users.noreply.github.com>
Co-authored-by: berkecanrizai <63911408+berkecanrizai@users.noreply.github.com>
Co-authored-by: Liangyx2 <yuxiang.liang@intel.com>
Co-authored-by: kevinintel <hanwen.chang@intel.com>
---
 .../docker/compose/reranks-compose-cd.yaml    |  8 ++
 comps/reranks/neural-speed/README.md          | 32 +++++++
 comps/reranks/neural-speed/__init__.py        |  2 +
 comps/reranks/neural-speed/docker/Dockerfile  | 31 +++++++
 .../docker/docker_compose_embedding.yaml      | 22 +++++
 .../neuralspeed-docker/Dockerfile             | 27 ++++++
 .../neural-speed/neuralspeed-docker/client.py | 35 +++++++
 .../neuralspeed-docker/client_multibatch.py   | 45 +++++++++
 .../neuralspeed-docker/requirements.txt       | 16 ++++
 .../neural-speed/neuralspeed-docker/server.py | 91 ++++++++++++++++++
 comps/reranks/neural-speed/requirements.txt   | 11 +++
 .../neural-speed/reranking_neuralspeed_svc.py | 93 +++++++++++++++++++
 tests/test_reranks_mosec-neuralspeed.sh       | 84 +++++++++++++++++
 13 files changed, 497 insertions(+)
 create mode 100644 comps/reranks/neural-speed/README.md
 create mode 100644 comps/reranks/neural-speed/__init__.py
 create mode 100644 comps/reranks/neural-speed/docker/Dockerfile
 create mode 100644 comps/reranks/neural-speed/docker/docker_compose_embedding.yaml
 create mode 100644 comps/reranks/neural-speed/neuralspeed-docker/Dockerfile
 create mode 100644 comps/reranks/neural-speed/neuralspeed-docker/client.py
 create mode 100644 comps/reranks/neural-speed/neuralspeed-docker/client_multibatch.py
 create mode 100644 comps/reranks/neural-speed/neuralspeed-docker/requirements.txt
 create mode 100644 comps/reranks/neural-speed/neuralspeed-docker/server.py
 create mode 100644 comps/reranks/neural-speed/requirements.txt
 create mode 100644 comps/reranks/neural-speed/reranking_neuralspeed_svc.py
 create mode 100644 tests/test_reranks_mosec-neuralspeed.sh

diff --git a/.github/workflows/docker/compose/reranks-compose-cd.yaml b/.github/workflows/docker/compose/reranks-compose-cd.yaml
index d805b570e..0cebf4ca3 100644
--- a/.github/workflows/docker/compose/reranks-compose-cd.yaml
+++ b/.github/workflows/docker/compose/reranks-compose-cd.yaml
@@ -14,3 +14,11 @@ services:
     build:
       dockerfile: comps/reranks/langchain-mosec/docker/Dockerfile
     image: ${REGISTRY:-opea}/reranking-langchain-mosec:${TAG:-latest}
+  reranking-mosec-neural-speed:
+    build:
+      dockerfile: comps/reranks/neural-speed/docker/Dockerfile
+    image: ${REGISTRY:-opea}/reranking-mosec-neural-speed:${TAG:-latest}
+  reranking-mosec-neural-speed-endpoint:
+    build:
+      dockerfile: comps/reranks/neural-speed/neuralspeed-docker/Dockerfile
+    image: ${REGISTRY:-opea}/reranking-mosec-neural-speed-endpoint:${TAG:-latest}
diff --git a/comps/reranks/neural-speed/README.md b/comps/reranks/neural-speed/README.md
new file mode 100644
index 000000000..c1841e16a
--- /dev/null
+++ b/comps/reranks/neural-speed/README.md
@@ -0,0 +1,32 @@
+# build Mosec endpoint docker image
+
+```
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t langchain-mosec:neuralspeed-reranks -f comps/reranks/neural-speed/neuralspeed-docker/Dockerfile .
+```
+
+# build Reranking microservice docker image
+
+```
+docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec:neuralspeed -f comps/reranks/neural-speed/docker/Dockerfile .
+```
+
+Note: Please contact us to request model files before building images.
+
+# launch Mosec endpoint docker container
+
+```
+docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000  langchain-mosec:neuralspeed-reranks
+```
+
+# launch Reranking microservice docker container
+
+```
+export MOSEC_RERANKING_ENDPOINT=http://127.0.0.1:6001
+docker run -d --name="reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:neuralspeed
+```
+
+# run client test
+
+```
+curl http://localhost:6000/v1/reranking   -X POST   -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'   -H 'Content-Type: application/json'
+```
diff --git a/comps/reranks/neural-speed/__init__.py b/comps/reranks/neural-speed/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/comps/reranks/neural-speed/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/reranks/neural-speed/docker/Dockerfile b/comps/reranks/neural-speed/docker/Dockerfile
new file mode 100644
index 000000000..8ffed65ec
--- /dev/null
+++ b/comps/reranks/neural-speed/docker/Dockerfile
@@ -0,0 +1,31 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM langchain/langchain:latest
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/reranks/neural-speed/requirements.txt
+
+RUN pip3 install llmspec mosec msgspec httpx requests
+RUN pip3 install torch==2.2.2 --trusted-host download.pytorch.org --index-url https://download.pytorch.org/whl/cpu
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/reranks/neural-speed
+
+ENTRYPOINT ["python", "reranking_neuralspeed_svc.py"]
+
diff --git a/comps/reranks/neural-speed/docker/docker_compose_embedding.yaml b/comps/reranks/neural-speed/docker/docker_compose_embedding.yaml
new file mode 100644
index 000000000..d5f59b4a0
--- /dev/null
+++ b/comps/reranks/neural-speed/docker/docker_compose_embedding.yaml
@@ -0,0 +1,22 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  reranking:
+    image: opea/reranking-langchain-mosec:neuralspeed
+    container_name: reranking-langchain-mosec-server
+    ports:
+      - "6000:8000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MOSEC_RERANKING_ENDPOINT: ${MOSEC_RERANKING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/reranks/neural-speed/neuralspeed-docker/Dockerfile b/comps/reranks/neural-speed/neuralspeed-docker/Dockerfile
new file mode 100644
index 000000000..42dcbad8c
--- /dev/null
+++ b/comps/reranks/neural-speed/neuralspeed-docker/Dockerfile
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+From ubuntu:22.04
+ARG DEBIAN_FRONTEND=noninteractive
+
+ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive
+
+COPY comps /root/comps
+COPY neural_speed-0.1.dev45+g41ea0aa-cp310-cp310-linux_x86_64.whl /root/
+COPY bge-large-r-q8.bin /root/
+COPY libstdc++.so.6 /root/
+
+RUN apt update && apt install -y python3 python3-pip 
+RUN pip3 install -r /root/comps/reranks/neural-speed/neuralspeed-docker/requirements.txt
+RUN pip3 install llmspec mosec msgspec httpx requests
+RUN pip3 install /root/neural_speed-0.1.dev45+g41ea0aa-cp310-cp310-linux_x86_64.whl
+
+RUN cd /root/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-reranker-large --local-dir /root/bge-reranker-large
+
+
+ENV LD_PRELOAD=/root/libstdc++.so.6
+
+
+WORKDIR /root/comps/reranks/neural-speed/neuralspeed-docker
+
+CMD ["python3", "server.py"]
diff --git a/comps/reranks/neural-speed/neuralspeed-docker/client.py b/comps/reranks/neural-speed/neuralspeed-docker/client.py
new file mode 100644
index 000000000..02017faaf
--- /dev/null
+++ b/comps/reranks/neural-speed/neuralspeed-docker/client.py
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from http import HTTPStatus
+
+import httpx
+import msgspec
+import requests
+
+req = {
+    "query": "talk is cheap, show me the code",
+    "docs": [
+        "what a nice day",
+        "life is short, use python",
+        "early bird catches the worm",
+    ],
+}
+
+httpx_response = httpx.post("http://127.0.0.1:8080/inference", content=msgspec.msgpack.encode(req))
+
+requests_response = requests.post("http://127.0.0.1:8080/inference", data=msgspec.msgpack.encode(req))
+
+MOSEC_RERANKING_ENDPOINT = os.environ.get("MOSEC_RERANKING_ENDPOINT", "http://127.0.0.1:8080")
+
+request_url = MOSEC_RERANKING_ENDPOINT + "/inference"
+print(f"request_url = {request_url}")
+resp_3 = requests.post(request_url, data=msgspec.msgpack.encode(req))
+
+if httpx_response.status_code == HTTPStatus.OK and requests_response.status_code == HTTPStatus.OK:
+    print(f"OK: \n {msgspec.msgpack.decode(httpx_response.content)}")
+    print(f"OK: \n {msgspec.msgpack.decode(requests_response.content)}")
+    print(f"OK: \n {msgspec.msgpack.decode(resp_3.content)}")
+else:
+    print(f"err[{httpx_response.status_code}] {httpx_response.text}")
diff --git a/comps/reranks/neural-speed/neuralspeed-docker/client_multibatch.py b/comps/reranks/neural-speed/neuralspeed-docker/client_multibatch.py
new file mode 100644
index 000000000..09eee1dfb
--- /dev/null
+++ b/comps/reranks/neural-speed/neuralspeed-docker/client_multibatch.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from http import HTTPStatus
+from threading import Thread
+
+import httpx
+import msgspec
+
+req = {
+    "query": "talk is cheap, show me the code",
+    "docs": [
+        "what a nice day",
+        "life is short, use python",
+        "early bird catches the worm",
+    ],
+}
+reqs = []
+BATCH = 32
+for i in range(BATCH):
+    reqs.append(msgspec.msgpack.encode(req))
+
+
+def post_func(threadIdx):
+    resp = httpx.post("http://127.0.0.1:8080/inference", content=reqs[threadIdx])
+    ret = f"thread {threadIdx} \n"
+    if resp.status_code == HTTPStatus.OK:
+        ret += f"OK: {msgspec.msgpack.decode(resp.content)['scores']}"
+    else:
+        ret += f"err[{resp.status_code}] {resp.text}"
+    print(ret)
+
+
+threads = []
+for i in range(BATCH):
+    t = Thread(
+        target=post_func,
+        args=[
+            i,
+        ],
+    )
+    threads.append(t)
+
+for i in range(BATCH):
+    threads[i].start()
diff --git a/comps/reranks/neural-speed/neuralspeed-docker/requirements.txt b/comps/reranks/neural-speed/neuralspeed-docker/requirements.txt
new file mode 100644
index 000000000..50dc540fc
--- /dev/null
+++ b/comps/reranks/neural-speed/neuralspeed-docker/requirements.txt
@@ -0,0 +1,16 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+accelerate
+cmake
+datasets
+huggingface_hub
+matplotlib
+numpy
+peft
+protobuf<3.20
+py-cpuinfo
+sentencepiece
+tiktoken
+torch
+transformers
+transformers_stream_generator
+zipfile38
diff --git a/comps/reranks/neural-speed/neuralspeed-docker/server.py b/comps/reranks/neural-speed/neuralspeed-docker/server.py
new file mode 100644
index 000000000..0176abcfb
--- /dev/null
+++ b/comps/reranks/neural-speed/neuralspeed-docker/server.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import time
+from typing import Any, List
+
+import numpy
+from mosec import Server, Worker, get_logger
+from mosec.mixin import TypedMsgPackMixin
+from msgspec import Struct
+from neural_speed import Model
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+logger = get_logger()
+
+INFERENCE_BATCH_SIZE = 128
+INFERENCE_MAX_WAIT_TIME = 10
+INFERENCE_WORKER_NUM = 1
+INFERENCE_CONTEXT = 512
+
+TorchModel = "/root/bge-reranker-large"
+NS_Bin = "/root/bge-large-r-q8.bin"
+
+NS_Model = "bert"
+
+
+class Request(Struct, kw_only=True):
+    query: str
+    docs: List[str]
+
+
+class Response(Struct, kw_only=True):
+    scores: List[float]
+
+
+class Inference(TypedMsgPackMixin, Worker):
+
+    def __init__(self):
+        super().__init__()
+        self.tokenizer = AutoTokenizer.from_pretrained(TorchModel)
+        self.model = Model()
+        self.model.init_from_bin(
+            NS_Model,
+            NS_Bin,
+            batch_size=INFERENCE_BATCH_SIZE,
+            n_ctx=INFERENCE_CONTEXT + 2,
+        )
+
+    def forward(self, data: List[Request]) -> List[Response]:
+        batch = len(data)
+        ndoc = []
+        inps = []
+        for data in data:
+            inp = [[data.query, doc] for doc in data.docs]
+            inps.extend(inp)
+            ndoc.append(len(data.docs))
+        outs = []
+        for i in range(0, len(inps), INFERENCE_BATCH_SIZE):
+            inp_bs = inps[i : i + INFERENCE_BATCH_SIZE]
+            inputs = self.tokenizer(
+                inp_bs, padding=True, truncation=True, max_length=INFERENCE_CONTEXT, return_tensors="pt"
+            )
+            st = time.time()
+            output = self.model(
+                **inputs,
+                reinit=True,
+                logits_all=True,
+                continuous_batching=False,
+                ignore_padding=True,
+            )
+            logger.info(f"Toal batch {batch} input shape {inputs.input_ids.shape} time {time.time()-st}")
+            outs.append(output)
+        ns_outputs = numpy.concatenate(outs, axis=0)
+        resps = []
+        pos = 0
+        for i in range(batch):
+            resp = Response(scores=ns_outputs[pos : pos + ndoc[i]].tolist())
+            pos += ndoc[i]
+            resps.append(resp)
+        return resps
+
+
+if __name__ == "__main__":
+    INFERENCE_BATCH_SIZE = int(os.environ.get("MAX_BATCH_SIZE", 128))
+    INFERENCE_MAX_WAIT_TIME = int(os.environ.get("MAX_WAIT_TIME", 1))
+    server = Server()
+    server.append_worker(
+        Inference, max_batch_size=INFERENCE_BATCH_SIZE, max_wait_time=INFERENCE_MAX_WAIT_TIME, num=INFERENCE_WORKER_NUM
+    )
+    server.run()
diff --git a/comps/reranks/neural-speed/requirements.txt b/comps/reranks/neural-speed/requirements.txt
new file mode 100644
index 000000000..9fa1a059c
--- /dev/null
+++ b/comps/reranks/neural-speed/requirements.txt
@@ -0,0 +1,11 @@
+docarray[full]
+fastapi
+langchain
+langchain_community
+openai
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+shortuuid
+uvicorn
diff --git a/comps/reranks/neural-speed/reranking_neuralspeed_svc.py b/comps/reranks/neural-speed/reranking_neuralspeed_svc.py
new file mode 100644
index 000000000..098378a52
--- /dev/null
+++ b/comps/reranks/neural-speed/reranking_neuralspeed_svc.py
@@ -0,0 +1,93 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import heapq
+import json
+import os
+import re
+import time
+from typing import List, Optional, Union
+
+import httpx
+import msgspec
+import requests
+import torch
+from langchain_core.prompts import ChatPromptTemplate
+from langsmith import traceable
+
+from comps import (
+    CustomLogger,
+    LLMParamsDoc,
+    SearchedDoc,
+    ServiceType,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+from comps.cores.proto.api_protocol import (
+    ChatCompletionRequest,
+    RerankingRequest,
+    RerankingResponse,
+    RerankingResponseData,
+)
+
+
+@register_microservice(
+    name="opea_service@reranking_mosec",
+    service_type=ServiceType.RERANK,
+    endpoint="/v1/reranking",
+    host="0.0.0.0",
+    port=8000,
+    input_datatype=SearchedDoc,
+    output_datatype=LLMParamsDoc,
+)
+@traceable(run_type="reranking")
+@register_statistics(names=["opea_service@reranking_mosec"])
+def reranking(
+    input: Union[SearchedDoc, RerankingRequest, ChatCompletionRequest]
+) -> Union[LLMParamsDoc, RerankingResponse, ChatCompletionRequest]:
+    start = time.time()
+    reranking_results = []
+    if input.retrieved_docs:
+        docs = [doc.text for doc in input.retrieved_docs]
+        url = mosec_reranking_endpoint + "/inference"
+        if isinstance(input, SearchedDoc):
+            query = input.initial_query
+        else:
+            # for RerankingRequest, ChatCompletionRequest
+            query = input.input
+        data = {"query": query, "docs": docs}
+        resp = requests.post(url, data=msgspec.msgpack.encode(data))
+        response_list = msgspec.msgpack.decode(resp.content)["scores"]
+        response = torch.nn.functional.sigmoid(torch.tensor(response_list))
+        length = len(response)
+        resp_list = response.tolist()
+        sorted_score = heapq.nlargest(length, resp_list)
+        sorted_score_index = map(resp_list.index, sorted_score)
+
+        for i in range(input.top_n):
+            reranking_results.append(
+                {"text": input.retrieved_docs[list(sorted_score_index)[i]].text, "score": sorted_score[i]}
+            )
+
+    statistics_dict["opea_service@reranking_mosec"].append_latency(time.time() - start, None)
+    if isinstance(input, SearchedDoc):
+        return LLMParamsDoc(query=input.initial_query, documents=[doc["text"] for doc in reranking_results])
+    else:
+        reranking_docs = []
+        for doc in reranking_results:
+            reranking_docs.append(RerankingResponseData(text=doc["text"], score=doc["score"]))
+        if isinstance(input, RerankingRequest):
+            return RerankingResponse(reranked_docs=reranking_docs)
+
+        if isinstance(input, ChatCompletionRequest):
+            input.reranked_docs = reranking_docs
+            input.documents = [doc["text"] for doc in reranking_results]
+            return input
+
+
+if __name__ == "__main__":
+    mosec_reranking_endpoint = os.getenv("MOSEC_RERANKING_ENDPOINT", "http://localhost:8080")
+    print("NeuralSpeed Reranking Microservice Initialized.")
+    opea_microservices["opea_service@reranking_mosec"].start()
diff --git a/tests/test_reranks_mosec-neuralspeed.sh b/tests/test_reranks_mosec-neuralspeed.sh
new file mode 100644
index 000000000..4512dc794
--- /dev/null
+++ b/tests/test_reranks_mosec-neuralspeed.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_mosec_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    cp /data2/nswhl/* ./
+    docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t langchain-mosec:neuralspeed-reranks -f comps/reranks/neural-speed/neuralspeed-docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/reranking-langchain-mosec-endpoint built fail"
+        exit 1
+    else
+        echo "opea/reranking-langchain-mosec-endpoint built successful"
+    fi
+}
+
+function build_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec:neuralspeed -f comps/reranks/neural-speed/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/reranking-langchain-mosec built fail"
+        exit 1
+    else
+        echo "opea/reranking-langchain-mosec built successful"
+    fi
+}
+
+function start_service() {
+    mosec_endpoint=5006
+    model="BAAI/bge-reranker-large"
+    unset http_proxy
+    docker run -d --name="test-comps-reranking-langchain-mosec-endpoint" -p $mosec_endpoint:8000  langchain-mosec:neuralspeed-reranks
+    export MOSEC_RERANKING_ENDPOINT="http://${ip_address}:${mosec_endpoint}"
+    mosec_service_port=5007
+    docker run -d --name="test-comps-reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${mosec_service_port}:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT  opea/reranking-langchain-mosec:neuralspeed
+    sleep 3m
+}
+
+function validate_microservice() {
+    mosec_service_port=5007
+    result=$(http_proxy="" curl http://${ip_address}:${mosec_service_port}/v1/reranking\
+        -X POST \
+        -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+        -H 'Content-Type: application/json')
+    if [[ $result == *"Deep"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-reranking-langchain-mosec-endpoint
+        docker logs test-comps-reranking-langchain-mosec-server
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-reranking-langchain-mosec-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_mosec_docker_images
+
+    build_docker_images
+
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main