From c9c6963f7e9f8c7a3bc41d123683fb55ace37110 Mon Sep 17 00:00:00 2001 From: Alex Litvinov Date: Sun, 23 Jun 2024 22:48:16 +0200 Subject: [PATCH] add prefect config and dockerfile for llm ingestion pin requests version for prefect-docker to work --- ingest/llm.dockerfile | 17 +++++++++++++++++ ingest/requirements.txt | 30 +++++++++++++++--------------- prefect.yaml | 30 +++++++++++++++++++++++++++--- requirements.txt | 1 + 4 files changed, 60 insertions(+), 18 deletions(-) create mode 100644 ingest/llm.dockerfile diff --git a/ingest/llm.dockerfile b/ingest/llm.dockerfile new file mode 100644 index 0000000..a14e100 --- /dev/null +++ b/ingest/llm.dockerfile @@ -0,0 +1,17 @@ +FROM prefecthq/prefect:2-python3.10 + +ENV EMBEDDING_CACHE_NAMESPACE=llm_zoomcamp + +RUN apt-get update && \ + apt-get install -y gcc python3-dev + +RUN pip install -U pip + +WORKDIR /usr/src + +COPY ingest/requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY ingest/llm/ingest_llm.py ingest/llm/ +COPY ingest/readers ingest/readers +COPY ingest/utils ingest/utils diff --git a/ingest/requirements.txt b/ingest/requirements.txt index a812362..3fc97e2 100644 --- a/ingest/requirements.txt +++ b/ingest/requirements.txt @@ -1,20 +1,20 @@ -slack-sdk==3.27.1 -langchain==0.1.11 -google-api-python-client==2.121.0 +slack-sdk==3.30.0 +langchain==0.1.20 +google-api-python-client==2.134.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.0 -sentence-transformers==2.5.1 -prefect-gcp==0.5.6 -GitPython==3.1.42 -pymilvus==2.3.6 -llama-index-core==0.10.18.post1 -llama-index-readers-web==0.1.6 -llama-index-readers-github==0.1.7 -llama-index-vector-stores-milvus==0.1.6 +sentence-transformers==3.0.1 +prefect-gcp==0.5.12 +GitPython==3.1.43 +pymilvus==2.4.4 +llama-index-core==0.10.48 +llama-index-readers-web==0.1.19 +llama-index-readers-github==0.1.9 +llama-index-vector-stores-milvus==0.1.20 llama-index-embeddings-langchain==0.1.2 -trafilatura==1.7.0 -nbconvert==7.16.2 -ipython==8.22.2 -upstash-redis==1.0.0 +trafilatura==1.10.0 +nbconvert==7.16.4 +ipython==8.25.0 +upstash-redis==1.1.0 jupyter-notebook-parser==0.1.4 diff --git a/prefect.yaml b/prefect.yaml index dd1381b..0f14c3c 100644 --- a/prefect.yaml +++ b/prefect.yaml @@ -4,7 +4,7 @@ # Generic metadata about this project name: zoomcamp-bot-index -prefect-version: 2.16.3 +prefect-version: 2.19.5 # build section allows you to manage and build docker images build: @@ -31,8 +31,8 @@ definitions: EXECUTION_ENV: zilliz auto_remove: true schedules: - at_23_daily: &at_23_daily - cron: 0 23 * * * + at_0_daily: &at_0_daily + cron: 0 0 * * * timezone: Europe/Madrid day_or: true at_1_daily: &at_1_daily @@ -123,3 +123,27 @@ deployments: pull: - prefect.deployments.steps.set_working_directory: directory: /usr/src + - name: fill-index-zilliz-llm + tags: + - llm-ingest + - zoomcamp-faq-bot + description: Fill Zilliz index for LLM Zoomcamp + schedules: + - *at_0_daily + entrypoint: ingest/llm/ingest_llm.py:fill_llm_index + work_pool: + <<: *zoomcamp-faq-bot-pool + job_variables: + <<: *job-variables + env: + EXECUTION_ENV: zilliz-cluster + build: + - prefect.deployments.steps.run_shell_script: *shell-script-config + - prefect_docker.deployments.steps.build_docker_image: + <<: *docker-build-config + # Uses the docker_build_config and overrides the dockerfile and image_name fields + dockerfile: ingest/llm.dockerfile + image_name: aaalexlit/zoomcamp-faq-ingest-llm + pull: + - prefect.deployments.steps.set_working_directory: + directory: /usr/src diff --git a/requirements.txt b/requirements.txt index 31270ef..e21625c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,3 +26,4 @@ prefect-docker langchain-openai upstash-redis jupyter-notebook-parser +requests==2.31.0