Skip to content

Commit

Permalink
add prefect config and dockerfile for llm ingestion
Browse files Browse the repository at this point in the history
pin requests version for prefect-docker to work
  • Loading branch information
aaalexlit committed Jun 23, 2024
1 parent a33aea2 commit c9c6963
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 18 deletions.
17 changes: 17 additions & 0 deletions ingest/llm.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM prefecthq/prefect:2-python3.10

ENV EMBEDDING_CACHE_NAMESPACE=llm_zoomcamp

RUN apt-get update && \
apt-get install -y gcc python3-dev

RUN pip install -U pip

WORKDIR /usr/src

COPY ingest/requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

COPY ingest/llm/ingest_llm.py ingest/llm/
COPY ingest/readers ingest/readers
COPY ingest/utils ingest/utils
30 changes: 15 additions & 15 deletions ingest/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
slack-sdk==3.27.1
langchain==0.1.11
google-api-python-client==2.121.0
slack-sdk==3.30.0
langchain==0.1.20
google-api-python-client==2.134.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==1.2.0
sentence-transformers==2.5.1
prefect-gcp==0.5.6
GitPython==3.1.42
pymilvus==2.3.6
llama-index-core==0.10.18.post1
llama-index-readers-web==0.1.6
llama-index-readers-github==0.1.7
llama-index-vector-stores-milvus==0.1.6
sentence-transformers==3.0.1
prefect-gcp==0.5.12
GitPython==3.1.43
pymilvus==2.4.4
llama-index-core==0.10.48
llama-index-readers-web==0.1.19
llama-index-readers-github==0.1.9
llama-index-vector-stores-milvus==0.1.20
llama-index-embeddings-langchain==0.1.2
trafilatura==1.7.0
nbconvert==7.16.2
ipython==8.22.2
upstash-redis==1.0.0
trafilatura==1.10.0
nbconvert==7.16.4
ipython==8.25.0
upstash-redis==1.1.0
jupyter-notebook-parser==0.1.4

30 changes: 27 additions & 3 deletions prefect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# Generic metadata about this project
name: zoomcamp-bot-index
prefect-version: 2.16.3
prefect-version: 2.19.5

# build section allows you to manage and build docker images
build:
Expand All @@ -31,8 +31,8 @@ definitions:
EXECUTION_ENV: zilliz
auto_remove: true
schedules:
at_23_daily: &at_23_daily
cron: 0 23 * * *
at_0_daily: &at_0_daily
cron: 0 0 * * *
timezone: Europe/Madrid
day_or: true
at_1_daily: &at_1_daily
Expand Down Expand Up @@ -123,3 +123,27 @@ deployments:
pull:
- prefect.deployments.steps.set_working_directory:
directory: /usr/src
- name: fill-index-zilliz-llm
tags:
- llm-ingest
- zoomcamp-faq-bot
description: Fill Zilliz index for LLM Zoomcamp
schedules:
- *at_0_daily
entrypoint: ingest/llm/ingest_llm.py:fill_llm_index
work_pool:
<<: *zoomcamp-faq-bot-pool
job_variables:
<<: *job-variables
env:
EXECUTION_ENV: zilliz-cluster
build:
- prefect.deployments.steps.run_shell_script: *shell-script-config
- prefect_docker.deployments.steps.build_docker_image:
<<: *docker-build-config
# Uses the docker_build_config and overrides the dockerfile and image_name fields
dockerfile: ingest/llm.dockerfile
image_name: aaalexlit/zoomcamp-faq-ingest-llm
pull:
- prefect.deployments.steps.set_working_directory:
directory: /usr/src
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ prefect-docker
langchain-openai
upstash-redis
jupyter-notebook-parser
requests==2.31.0

0 comments on commit c9c6963

Please sign in to comment.