forked from ianroberts/nteu-adapter-tilde
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
42 lines (29 loc) · 1.54 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
FROM python:3.8-slim
# Install tini and create an unprivileged user
ADD https://github.com/krallin/tini/releases/download/v0.19.0/tini /sbin/tini
RUN addgroup --gid 1001 "elg" && adduser --disabled-password --gecos "ELG User,,," --home /elg --ingroup elg --uid 1001 elg && chmod +x /sbin/tini
# Copy in just the requirements file
COPY --chown=elg:elg requirements.txt /elg/
# Everything from here down runs as the unprivileged user account
USER elg:elg
WORKDIR /elg
# Create a Python virtual environment for the dependencies
RUN python -m venv venv
RUN /elg/venv/bin/python -m pip install --upgrade pip
RUN venv/bin/pip --no-cache-dir install -r requirements.txt
# Copy ini the entrypoint script and everything else our app needs
COPY --chown=elg:elg docker-entrypoint.sh adapter.py /elg/
# Many Python libraries used for LT such as nltk, transformers, etc. default to
# downloading their models from the internet the first time they are accessed.
# This is a problem for container images, as every run is the "first time"
# starting from a clean copy of the image. Therefore it is strongly
# recommended to pre-download any models that your code depends on during the
# build, so they are cached within the final image. For example:
#
# RUN venv/bin/python -m nltk.downloader -d venv/share/nltk_data punkt
#
# RUN venv/bin/python -c "from transformers import DistilBertTokenizer" \
# -c "DistilBertTokenizer.from_pretrained('bert-base-uncased')"
ENV LOGURU_LEVEL=INFO
RUN chmod +x ./docker-entrypoint.sh
ENTRYPOINT ["./docker-entrypoint.sh"]