-
Notifications
You must be signed in to change notification settings - Fork 1
/
Dockerfile
158 lines (138 loc) · 6.84 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# This builds the base images that we can use for development testing. See
# .github/workflows/debug.yml
# It is also used in main GitHub Actions workflow: see .github/workflows/github.yml).
#
# Notes:
# - Mostly based initially on
# https://stackoverflow.com/a/70866416 [How to install python specific version on docker?]
# - For Docker docs, see https://docs.docker.com/get-started.
# - The avoid linux continuation characters (i.e., \<newline), <<END-style heredocs are used: See
# https://www.docker.com/blog/introduction-to-heredocs-in-dockerfiles
# This allows for commenting out code (e.g., inline comment within heredoc group).
#
# Usage:
# 1. Build the image:
# $ docker build -t mezcla-dev -f- . <Dockerfile
# # TODO: build --platform linux/x86_64 ...
# 2. Run tests using the created image (n.b., uses entrypoint at end below with run_tests.bash):
# $ docker run -it --rm --mount type=bind,source="$(pwd)",target=/home/mezcla mezcla-dev
# TODO: --mount => --volume???
# NOTE: --rm removes container afterwards; -it is for --interactive with --tty
# 3. [Optional] Run a bash shell using the created image:
# $ docker run -it --rm --entrypoint='/bin/bash' --mount type=bind,source="$(pwd)",target=/home/mezcla mezcla-dev
# 4. Remove the image:
# $ docker rmi mezcla-dev
#
# Warning:
# - *** Changes need to be synchronized in 3 places: Dockerfile, tools/local-workflow.sh, and .github/workflows/*.yml!
#
# TODO3: keep in synch with ~/bin version (https://github.com/tomasohara/shell-scripts)
#
## NOTE: Uses a smaller image to speed up build
## TEST: FROM ghcr.io/catthehacker/ubuntu:act-latest
FROM catthehacker/ubuntu:act-20.04
ARG WORKDIR=/home/mezcla
ARG REQUIREMENTS=$WORKDIR/requirements.txt
## TODO?: RUN mkdir -p $WORKDIR
WORKDIR $WORKDIR
# Set the Python version to install
# Note: The workflow uses versions 3.9 to 3.11 for installations under runner VM
## OLD: ARG PYTHON_VERSION=3.8.12
## TODO:
ARG PYTHON_VERSION=3.11.4
## TODO: ARG PYTHON_VERSION=""
## OLD: ARG PYTHON_TAG="117929"
## TODO:
ARG PYTHON_TAG="5199054971"
# Set default debug level (n.b., use docker build --build-arg "arg1=v1" to override)
# Also optionally set the regex of tests to run.
# Note: maldito act/nektos/docker not overriding properly
## TODO2: fixme (see tools/run_tests.bash for workaround).
## TODO: ARG DEBUG_LEVEL=2
ARG DEBUG_LEVEL=4
## DEBUG: ARG DEBUG_LEVEL=5
ARG TEST_REGEX=""
## DEBUG: ARG TEST_REGEX="simple_main_example"
# Show initial disk usage
# See https://github.com/orgs/community/discussions/25678 [No space left on device]
#
RUN <<END_RUN
df --human-readable
## TODO: track down stupid problem with step failing
## echo "Top directories by disk usage (pre-install):";
## du --block-size=1K / 2>&1 | sort -rn | head -20;
true; # ensure success (quirk w/ head)
END_RUN
# Temp: remove unneeded software taking up much disk space (e.g., node)
RUN <<END_RUN
echo "Warning: removing unneeded software"
/bin/rm -rf /opt/acttoolcache/node
df -h /
END_RUN
# Install Python
# See https://stackoverflow.com/a/70866416 [How to install python specific version on docker?]
#
# Download, extract, and install the specified Python version
# Note:
# - Uses versions prepared for Github Actions
# - To find URL links, see https://github.com/actions/python-versions:
# ex: https://github.com/actions/python-versions/releases/download/3.8.12-117929/python-3.8.12-linux-20.04-x64.tar.gz
# - Also see https://stackoverflow.com/questions/74673048/github-actions-setup-python-stopped-working.
RUN if [ "$PYTHON_VERSION" != "" ]; then \
wget -qO /tmp/python-${PYTHON_VERSION}-linux-20.04-x64.tar.gz "https://github.com/actions/python-versions/releases/download/${PYTHON_VERSION}-${PYTHON_TAG}/python-${PYTHON_VERSION}-linux-20.04-x64.tar.gz" && \
mkdir -p /opt/hostedtoolcache/Python/${PYTHON_VERSION}/x64 && \
tar -xzf /tmp/python-${PYTHON_VERSION}-linux-20.04-x64.tar.gz \
-C /opt/hostedtoolcache/Python/${PYTHON_VERSION}/x64 --strip-components=1 && \
rm /tmp/python-${PYTHON_VERSION}-linux-20.04-x64.tar.gz; \
fi
## TODO (use streamlined python installation):
## RUN apt-get update && \
## apt-get install -y software-properties-common && \
## add-apt-repository -y ppa:deadsnakes/ppa && \
## apt-get update && \
## apt-get install -y python$PYTHON_MAJ_MIN
# Some programs require a "python" binary
## OLD: RUN ln -s $(which python3) /usr/local/bin/python
# Set the working directory visible
ENV PYTHONPATH="${PYTHONPATH}:$WORKDIR"
# Install pip for the specified Python version (TODO rm)
RUN if [ "$PYTHON_VERSION" == "" ]; then \
wget -qO /tmp/get-pip.py "https://bootstrap.pypa.io/get-pip.py" && \
python3 /tmp/get-pip.py; \
true || rm /tmp/get-pip.py; \
fi
# Copy the project's requirements file to the container
COPY ./requirements.txt $REQUIREMENTS
# Install the package requirements
# NOTE: The workflow only handles requirements for the runner VM, not the docker container;
# Also, the results aren't cached to save space in the image.
RUN <<END_RUN
if [ "$(which nltk)" == "" ]; then
python -m pip install --verbose --no-cache-dir --requirement $REQUIREMENTS;
## TODO?
## # note: makes a second pass for failed installations, doing non-binary
## python -m pip install --verbose --no-cache-dir --ignore-installed --no-binary --requirement $REQUIREMENTS;
fi
END_RUN
## TODO3: add option for optional requirements (likewise, for all via '#full#")
## RUN python -m pip install --verbose $(perl -pe 's/^#opt#\s*//g;' $REQUIREMENTS | grep -v '^#')
## TEMP workaround: copy source to image
## COPY . $WORKDIR/mezcla
# Download the NLTK required data
RUN python -m nltk.downloader -d /usr/local/share/nltk_data punkt averaged_perceptron_tagger stopwords
# Install required tools and libraries (TODO: why lsb-release?)
# Note: cleans the apt-get cache
RUN apt-get update -y && apt-get install --yes lsb-release && apt-get clean all
# note: rcs needed for merge (TODO: place in required-packages.txt)
RUN apt-get install --yes enchant-2 rcs
# Show disk usage when debugging
RUN <<END_RUN
df --human-readable
## TODO: track down stupid problem with step failing
## echo "Top directories by disk usage (post-install):";
## du --block-size=1K / 2>&1 | sort -rn | head -20;
true; # ensure success (quirk w/ head)
END_RUN
# Run the test, normally pytest over mezcla/tests
# Note: the status code (i.e., $?) determines whether docker run succeeds (e.g., OK if 0)
ENTRYPOINT DEBUG_LEVEL=$DEBUG_LEVEL TEST_REGEX="$TEST_REGEX" './tools/run_tests.bash'