diff --git a/comps/dataprep/opensearch/README.md b/comps/dataprep/opensearch/README.md index ed0af36a99..a4067b7eaa 100644 --- a/comps/dataprep/opensearch/README.md +++ b/comps/dataprep/opensearch/README.md @@ -6,14 +6,11 @@ For dataprep microservice for text input, we provide here the `Langchain` framew ### 1.1 Install Requirements -- option 1: Install Single-process version (for 1-10 files processing) +- option 1: Install Single-process version (for processing up to 10 files) ```bash apt update -apt install default-jre -apt-get install tesseract-ocr -y -apt-get install libtesseract-dev -y -apt-get install poppler-utils -y +apt install default-jre tesseract-ocr libtesseract-dev poppler-utils -y # for langchain cd langchain pip install -r requirements.txt @@ -26,6 +23,7 @@ Please refer to this [readme](../../vectorstores/opensearch/README.md). ### 1.3 Setup Environment Variables ```bash +export your_ip=$(hostname -I | awk '{print $1}') export OPENSEARCH_URL="http://${your_ip}:9200" export INDEX_NAME=${your_index_name} export PYTHONPATH=${path_to_comps} @@ -60,7 +58,7 @@ export TEI_ENDPOINT="http://localhost:$your_port" Start document preparation microservice for OpenSearch with below command. -- option 1: Start single-process version (for 1-10 files processing) +- option 1: Start single-process version (for processing up to 10 files) ```bash cd langchain @@ -87,7 +85,7 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} - Build docker image with langchain -- option 1: Start single-process version (for 1-10 files processing) +- option 1: Start single-process version (for processing up to 10 files) ```bash cd ../../ @@ -96,7 +94,7 @@ docker build -t opea/dataprep-opensearch:latest --build-arg https_proxy=$https_p ### 2.4 Run Docker with CLI (Option A) -- option 1: Start single-process version (for 1-10 files processing) +- option 1: Start single-process version (for processing up to 10 files) ```bash docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-opensearch:latest diff --git a/comps/dataprep/opensearch/langchain/config.py b/comps/dataprep/opensearch/langchain/config.py index 57694c28c6..49e9e65a5d 100644 --- a/comps/dataprep/opensearch/langchain/config.py +++ b/comps/dataprep/opensearch/langchain/config.py @@ -4,7 +4,6 @@ import os # Embedding model - EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") # OpenSearch Connection Information diff --git a/comps/dataprep/opensearch/langchain/requirements.txt b/comps/dataprep/opensearch/langchain/requirements.txt index aee90c3a62..fa242973e8 100644 --- a/comps/dataprep/opensearch/langchain/requirements.txt +++ b/comps/dataprep/opensearch/langchain/requirements.txt @@ -28,4 +28,3 @@ sentence_transformers shortuuid unstructured[all-docs] uvicorn - diff --git a/comps/retrievers/opensearch/langchain/Dockerfile b/comps/retrievers/opensearch/langchain/Dockerfile index ed7f8f68eb..038b5d6bc1 100644 --- a/comps/retrievers/opensearch/langchain/Dockerfile +++ b/comps/retrievers/opensearch/langchain/Dockerfile @@ -26,4 +26,3 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user WORKDIR /home/user/comps/retrievers/opensearch/langchain ENTRYPOINT ["python", "retriever_opensearch.py"] - diff --git a/comps/vectorstores/opensearch/docker-compose-opensearch.yml b/comps/vectorstores/opensearch/docker-compose-opensearch.yml index 06fcc815f6..1769850e65 100644 --- a/comps/vectorstores/opensearch/docker-compose-opensearch.yml +++ b/comps/vectorstores/opensearch/docker-compose-opensearch.yml @@ -11,21 +11,21 @@ services: - node.name=opensearch-node1 - discovery.seed_hosts=opensearch-node1,opensearch-node2 - cluster.initial_master_nodes=opensearch-node1,opensearch-node2 - - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later + - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM + - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later ulimits: memlock: soft: -1 hard: -1 nofile: - soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems + soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems hard: 65536 volumes: - ./opensearch-data1:/var/lib/opensearch/data ports: - 9200:9200 - - 9600:9600 # required for Performance Analyzer + - 9600:9600 # required for Performance Analyzer networks: - opensearch-net security_opt: @@ -40,7 +40,7 @@ services: - cluster.initial_master_nodes=opensearch-node1,opensearch-node2 - bootstrap.memory_lock=true - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later + - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later ulimits: memlock: soft: -1 @@ -64,7 +64,7 @@ services: expose: - "5601" environment: - OPENSEARCH_HOSTS: '["https://opensearch-node1:9200","https://opensearch-node2:9200"]' # must be a string with no spaces when specified as an environment variable + OPENSEARCH_HOSTS: '["https://opensearch-node1:9200","https://opensearch-node2:9200"]' # must be a string with no spaces when specified as an environment variable networks: - opensearch-net security_opt: