From 678fcf118a2d40b970098a16828df738602bb857 Mon Sep 17 00:00:00 2001 From: Ajay Kallepalli Date: Sun, 15 Dec 2024 21:12:00 -0800 Subject: [PATCH] Adding git to Dockerfile, tested dockerfile and dockercompose. Also parametrized variables in prepare_doc_arango.py --- comps/dataprep/arango/langchain/Dockerfile | 5 +++-- comps/dataprep/arango/langchain/README.md | 2 +- comps/dataprep/arango/langchain/config.py | 18 ++++++++++-------- .../docker-compose-dataprep-arango.yaml | 6 ++++-- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/comps/dataprep/arango/langchain/Dockerfile b/comps/dataprep/arango/langchain/Dockerfile index 7bd07262a..5d8aa7a48 100644 --- a/comps/dataprep/arango/langchain/Dockerfile +++ b/comps/dataprep/arango/langchain/Dockerfile @@ -11,7 +11,8 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin build-essential \ default-jre \ libgl1-mesa-glx \ - libjemalloc-dev + libjemalloc-dev \ + git RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ @@ -35,4 +36,4 @@ USER user WORKDIR /home/user/comps/dataprep/arango/langchain -ENTRYPOINT ["python", "prepare_doc_arango.py"] +ENTRYPOINT ["python", "prepare_doc_arango.py"] \ No newline at end of file diff --git a/comps/dataprep/arango/langchain/README.md b/comps/dataprep/arango/langchain/README.md index 1d1e696d4..d57249510 100644 --- a/comps/dataprep/arango/langchain/README.md +++ b/comps/dataprep/arango/langchain/README.md @@ -89,7 +89,7 @@ curl -X POST \ http://localhost:6007/v1/dataprep ``` -Additional options that can be specified from the config.py file are as follows: +Additional options that can be specified from the environment variables are as follows (default values are in the config.py file): OpenAI Configuration: - `OPENAI_EMBED_MODEL`: The embedding model to use for the OpenAI service. diff --git a/comps/dataprep/arango/langchain/config.py b/comps/dataprep/arango/langchain/config.py index 525862a9d..5b27a8166 100644 --- a/comps/dataprep/arango/langchain/config.py +++ b/comps/dataprep/arango/langchain/config.py @@ -10,10 +10,10 @@ ARANGO_DB_NAME = os.getenv("ARANGO_DB_NAME", "_system") # ArangoDB graph configuration -USE_ONE_ENTITY_COLLECTION = True -INSERT_ASYNC = False -ARANGO_BATCH_SIZE = 1000 -INCLUDE_SOURCE = True +USE_ONE_ENTITY_COLLECTION = os.getenv("USE_ONE_ENTITY_COLLECTION", True) +INSERT_ASYNC = os.getenv("INSERT_ASYNC", False) +ARANGO_BATCH_SIZE = os.getenv("ARANGO_BATCH_SIZE", 1000) +INCLUDE_SOURCE = os.getenv("INCLUDE_SOURCE", True) # Text Generation Inference configuration TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") @@ -27,11 +27,13 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") OPENAI_EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small") OPENAI_EMBED_DIMENSIONS = os.getenv("OPENAI_EMBED_DIMENSIONS", 512) +OPENAI_CHAT_MODEL = os.getenv("OPENAI_CHAT_MODEL", "gpt-4o") +OPENAI_CHAT_TEMPERATURE = os.getenv("OPENAI_CHAT_TEMPERATURE", 0) # LLMGraphTransformer configuration -ALLOWED_NODES = [] # ["Person", "Organization"] -ALLOWED_RELATIONSHIPS = [] # [("Person", "knows", "Person"), ("Person", "works_at", "Organization")] -NODE_PROPERTIES = False # ["description"] -RELATIONSHIP_PROPERTIES = False # ["description"] +ALLOWED_NODES = os.getenv("ALLOWED_NODES", []) # ["Person", "Organization"] +ALLOWED_RELATIONSHIPS = os.getenv("ALLOWED_RELATIONSHIPS", []) # [("Person", "knows", "Person"), ("Person", "works_at", "Organization")] +NODE_PROPERTIES = os.getenv("NODE_PROPERTIES", ['description']) +RELATIONSHIP_PROPERTIES = os.getenv("RELATIONSHIP_PROPERTIES", ['description']) SYSTEM_PROMPT_PATH = os.getenv("SYSTEM_PROMPT_PATH", "./prompt.txt") \ No newline at end of file diff --git a/comps/dataprep/arango/langchain/docker-compose-dataprep-arango.yaml b/comps/dataprep/arango/langchain/docker-compose-dataprep-arango.yaml index c766b5c03..08408c127 100644 --- a/comps/dataprep/arango/langchain/docker-compose-dataprep-arango.yaml +++ b/comps/dataprep/arango/langchain/docker-compose-dataprep-arango.yaml @@ -8,6 +8,8 @@ services: container_name: arango-graph-db ports: - "8529:8529" + environment: + ARANGO_ROOT_PASSWORD: ${ARANGO_PASSWORD} tgi_gaudi_service: image: ghcr.io/huggingface/tgi-gaudi:2.0.5 container_name: tgi-service @@ -23,7 +25,7 @@ services: HF_TOKEN: ${HF_TOKEN} command: --model-id ${LLM_MODEL_ID} --auto-truncate --max-input-tokens 1024 --max-total-tokens 2048 dataprep-arango: - image: opea/gen-ai-comps:dataprep-arango-xeon-server + image: opea/dataprep-arango:latest container_name: dataprep-arango-server depends_on: - arango-vector-db @@ -35,7 +37,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - ARANGO_URL: ${ARANGO_URL} + ARANGO_URL: http://arango-graph-db:8529 ARANGO_USERNAME: ${ARANGO_USERNAME} ARANGO_PASSWORD: ${ARANGO_PASSWORD} ARANGO_DB_NAME: ${ARANGO_DB_NAME}