-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Guillaume Moutier <[email protected]>
- Loading branch information
1 parent
b00718b
commit 7ff8aa7
Showing
23 changed files
with
3,076 additions
and
1,077 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Ignore Python cache files | ||
__pycache__/ | ||
**/__pycache__/ | ||
*.pyc | ||
*.pyo | ||
*.pyd | ||
|
||
# Ignore virtual environments | ||
env/ | ||
venv/ | ||
|
||
# Ignore development artifacts | ||
*.log | ||
*.db | ||
*.sqlite3 | ||
|
||
# Ignore configuration and sensitive files | ||
**/.env | ||
*.env | ||
*.ini | ||
*.cfg | ||
|
||
# Ignore IDE and editor settings | ||
.vscode/ | ||
.idea/ | ||
*.swp | ||
*.swo | ||
|
||
# Ignore Git files | ||
.git/ | ||
.gitignore | ||
|
||
# Ignore Docker files themselves (optional if not needed in the image) | ||
.dockerignore | ||
Dockerfile* | ||
|
||
# Ignore build artifacts (if applicable) | ||
build/ | ||
dist/ | ||
*.egg-info |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,59 @@ | ||
FROM python:3.11-slim-bookworm | ||
ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s | ||
|
||
FROM ${BASE_IMAGE} | ||
|
||
ARG CPU_ONLY=false | ||
WORKDIR /docling-serve | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y libgl1 libglib2.0-0 curl wget git \ | ||
&& apt-get clean | ||
USER 0 | ||
|
||
RUN pip install --no-cache-dir poetry | ||
################################################################################################### | ||
# OS Layer # | ||
################################################################################################### | ||
|
||
COPY pyproject.toml poetry.lock README.md /docling-serve/ | ||
RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \ | ||
dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \ | ||
dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \ | ||
dnf config-manager --enable crb && \ | ||
dnf -y update && \ | ||
dnf install -y $(cat /tmp/os-packages.txt) && \ | ||
dnf -y clean all && \ | ||
rm -rf /var/cache/dnf | ||
|
||
RUN if [ "$CPU_ONLY" = "true" ]; then \ | ||
poetry install --no-root --with cpu; \ | ||
else \ | ||
poetry install --no-root; \ | ||
fi | ||
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/ | ||
|
||
ENV HF_HOME=/tmp/ | ||
ENV TORCH_HOME=/tmp/ | ||
################################################################################################### | ||
# Docling layer # | ||
################################################################################################### | ||
|
||
RUN poetry run python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True);' | ||
USER 1001 | ||
|
||
WORKDIR /opt/app-root/src | ||
|
||
# On container environments, always set a thread budget to avoid undesired thread congestion. | ||
ENV OMP_NUM_THREADS=4 | ||
|
||
COPY ./docling_serve /docling-serve/docling_serve | ||
ENV LANG=en_US.UTF-8 | ||
ENV LC_ALL=en_US.UTF-8 | ||
ENV PYTHONIOENCODING=utf-8 | ||
|
||
COPY --chown=1001:0 pyproject.toml poetry.lock models_download.py README.md ./ | ||
|
||
RUN pip install --no-cache-dir poetry && \ | ||
# We already are in a virtual environment, so we don't need to create a new one, only activate it. | ||
poetry config virtualenvs.create false && \ | ||
source /opt/app-root/bin/activate && \ | ||
if [ "$CPU_ONLY" = "true" ]; then \ | ||
poetry install --no-root --no-cache --no-interaction --all-extras --with cpu --without dev; \ | ||
else \ | ||
poetry install --no-root --no-cache --no-interaction --all-extras --without dev; \ | ||
fi && \ | ||
echo "Downloading models..." && \ | ||
python models_download.py && \ | ||
chown -R 1001:0 /opt/app-root/src && \ | ||
chmod -R g=u /opt/app-root/src | ||
|
||
COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve | ||
|
||
EXPOSE 5000 | ||
EXPOSE 8080 | ||
|
||
CMD ["poetry", "run", "uvicorn", "--port", "5000", "--host", "0.0.0.0", "docling_serve.app:app"] | ||
CMD ["python", "docling_serve/app.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.