Skip to content

Commit

Permalink
api v1alpha1
Browse files Browse the repository at this point in the history
Signed-off-by: Guillaume Moutier <[email protected]>
  • Loading branch information
dolfim-ibm authored and guimou committed Jan 9, 2025
1 parent b00718b commit 7ff8aa7
Show file tree
Hide file tree
Showing 23 changed files with 3,076 additions and 1,077 deletions.
40 changes: 40 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Ignore Python cache files
__pycache__/
**/__pycache__/
*.pyc
*.pyo
*.pyd

# Ignore virtual environments
env/
venv/

# Ignore development artifacts
*.log
*.db
*.sqlite3

# Ignore configuration and sensitive files
**/.env
*.env
*.ini
*.cfg

# Ignore IDE and editor settings
.vscode/
.idea/
*.swp
*.swo

# Ignore Git files
.git/
.gitignore

# Ignore Docker files themselves (optional if not needed in the image)
.dockerignore
Dockerfile*

# Ignore build artifacts (if applicable)
build/
dist/
*.egg-info
63 changes: 45 additions & 18 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,59 @@
FROM python:3.11-slim-bookworm
ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

FROM ${BASE_IMAGE}

ARG CPU_ONLY=false
WORKDIR /docling-serve

RUN apt-get update \
&& apt-get install -y libgl1 libglib2.0-0 curl wget git \
&& apt-get clean
USER 0

RUN pip install --no-cache-dir poetry
###################################################################################################
# OS Layer #
###################################################################################################

COPY pyproject.toml poetry.lock README.md /docling-serve/
RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
dnf config-manager --enable crb && \
dnf -y update && \
dnf install -y $(cat /tmp/os-packages.txt) && \
dnf -y clean all && \
rm -rf /var/cache/dnf

RUN if [ "$CPU_ONLY" = "true" ]; then \
poetry install --no-root --with cpu; \
else \
poetry install --no-root; \
fi
ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

ENV HF_HOME=/tmp/
ENV TORCH_HOME=/tmp/
###################################################################################################
# Docling layer #
###################################################################################################

RUN poetry run python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True);'
USER 1001

WORKDIR /opt/app-root/src

# On container environments, always set a thread budget to avoid undesired thread congestion.
ENV OMP_NUM_THREADS=4

COPY ./docling_serve /docling-serve/docling_serve
ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENV PYTHONIOENCODING=utf-8

COPY --chown=1001:0 pyproject.toml poetry.lock models_download.py README.md ./

RUN pip install --no-cache-dir poetry && \
# We already are in a virtual environment, so we don't need to create a new one, only activate it.
poetry config virtualenvs.create false && \
source /opt/app-root/bin/activate && \
if [ "$CPU_ONLY" = "true" ]; then \
poetry install --no-root --no-cache --no-interaction --all-extras --with cpu --without dev; \
else \
poetry install --no-root --no-cache --no-interaction --all-extras --without dev; \
fi && \
echo "Downloading models..." && \
python models_download.py && \
chown -R 1001:0 /opt/app-root/src && \
chmod -R g=u /opt/app-root/src

COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve

EXPOSE 5000
EXPOSE 8080

CMD ["poetry", "run", "uvicorn", "--port", "5000", "--host", "0.0.0.0", "docling_serve.app:app"]
CMD ["python", "docling_serve/app.py"]
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ md-lint-file:
$(CMD_PREFIX) touch .markdown-lint

.PHONY: docling-serve-cpu-image
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" continaer image
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve CPU ONLY]"
$(CMD_PREFIX) docker build --build-arg CPU_ONLY=true -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) ghcr.io/ds4sd/docling-serve-cpu:main
$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) quay.io/ds4sd/docling-serve-cpu:main

.PHONY: docling-serve-gpu-image
docling-serve-gpu-image: Containerfile ## Build docling-serve continaer image with GPU support
docling-serve-gpu-image: Containerfile ## Build docling-serve container image with GPU support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with GPU]"
$(CMD_PREFIX) docker build --build-arg CPU_ONLY=false -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
Expand Down
Loading

0 comments on commit 7ff8aa7

Please sign in to comment.