From aad49fd05be445a1dc03d3cfe6801a484d4fc66e Mon Sep 17 00:00:00 2001 From: David Leifker Date: Fri, 30 Aug 2024 14:23:09 -0500 Subject: [PATCH] feat(base-image): update base image to ubuntu --- .github/workflows/test-results.yml | 5 +- build.gradle | 5 +- datahub-actions/build.gradle | 56 +++++++++++++++++----- datahub-actions/setup.py | 2 +- datahub-actions/tests/unit/test_helpers.py | 1 + docker/datahub-actions/Dockerfile | 37 +++++++------- 6 files changed, 75 insertions(+), 31 deletions(-) diff --git a/.github/workflows/test-results.yml b/.github/workflows/test-results.yml index 868670b8..02b98aaf 100644 --- a/.github/workflows/test-results.yml +++ b/.github/workflows/test-results.yml @@ -9,6 +9,9 @@ jobs: unit-test-results: name: Unit Test Results runs-on: ubuntu-latest + permissions: + checks: write + pull-requests: write if: github.event.workflow_run.conclusion != 'skipped' steps: @@ -28,7 +31,7 @@ jobs: done - name: Publish Unit Test Results - uses: EnricoMi/publish-unit-test-result-action@v1 + uses: EnricoMi/publish-unit-test-result-action@v2 with: check_name: "Unit Test Results (${{ github.event.workflow_run.name }})" commit: ${{ github.event.workflow_run.head_sha }} diff --git a/build.gradle b/build.gradle index cf63a9fa..fc78dd15 100644 --- a/build.gradle +++ b/build.gradle @@ -13,12 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - buildscript { apply from: './repositories.gradle' buildscript.repositories.addAll(project.repositories) } +plugins { + id 'com.palantir.docker' version '0.35.0' apply false +} + project.ext.externalDependency = [ ] diff --git a/datahub-actions/build.gradle b/datahub-actions/build.gradle index 668394c5..ea51f674 100644 --- a/datahub-actions/build.gradle +++ b/datahub-actions/build.gradle @@ -13,40 +13,57 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - plugins { - id 'base' + id 'base' + id 'com.palantir.docker' } ext { python_executable = 'python3' venv_name = 'venv' + docker_registry = 'acryldata' + docker_repo = 'datahub-actions' +} + +if (!project.hasProperty("extra_pip_requirements")) { + ext.extra_pip_requirements = "" } task checkPythonVersion(type: Exec) { - commandLine python_executable, '-c', 'import sys; assert sys.version_info >= (3, 6)' + commandLine python_executable, '-c', + 'import sys; sys.version_info >= (3, 8), f"Python version {sys.version_info[:2]} not allowed"' } task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { + def sentinel_file = "${venv_name}/.venv_environment_sentinel" inputs.file file('setup.py') - outputs.dir("${venv_name}") - commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && ${venv_name}/bin/python -m pip install --upgrade pip wheel setuptools" + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "${python_executable} -m venv ${venv_name} && " + + "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + + "touch ${sentinel_file}" } task installPackage(type: Exec, dependsOn: environmentSetup) { + def sentinel_file = "${venv_name}/.build_install_package_sentinel" inputs.file file('setup.py') - outputs.dir("${venv_name}") - commandLine "${venv_name}/bin/pip", 'install', '-e', '.' + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "uv pip install -e . ${extra_pip_requirements} && " + + "touch ${sentinel_file}" } task install(dependsOn: [installPackage]) task installDev(type: Exec, dependsOn: [install]) { + def sentinel_file = "${venv_name}/.build_install_dev_sentinel" inputs.file file('setup.py') - outputs.dir("${venv_name}") - outputs.file("${venv_name}/.build_install_dev_sentinel") - commandLine 'bash', '-x', '-c', - "${venv_name}/bin/pip install -e .[dev] && touch ${venv_name}/.build_install_dev_sentinel" + outputs.file(sentinel_file) + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "uv pip install -e .[dev] ${extra_pip_requirements} && " + + "touch ${sentinel_file}" } task lint(type: Exec, dependsOn: installDev) { @@ -108,6 +125,23 @@ task cleanPythonCache(type: Exec) { "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete" } +docker { + name "${docker_registry}/${docker_repo}:v${version}" + version "v${version}" + dockerfile file("${rootProject.projectDir}/docker/datahub-actions/Dockerfile") + files fileTree(rootProject.projectDir) { + exclude "datahub-actions/scripts/**" + exclude "datahub-actions/build/**" + exclude "datahub-actions/venv/**" + exclude "datahub-actions/tests/**" + exclude "**/*.xml" + include ".dockerignore" + include "docker/**" + include "datahub-actions/**" + } +} +tasks.getByName('docker').dependsOn('build') + build.dependsOn install check.dependsOn lint check.dependsOn testQuick diff --git a/datahub-actions/setup.py b/datahub-actions/setup.py index 8b0cf8fa..0c992a67 100644 --- a/datahub-actions/setup.py +++ b/datahub-actions/setup.py @@ -93,7 +93,7 @@ def get_long_description(): "types-pytz", "types-dataclasses", "sqlalchemy-stubs", - "types-pkg_resources", + "types-setuptools", "types-six", "types-python-dateutil", "types-requests", diff --git a/datahub-actions/tests/unit/test_helpers.py b/datahub-actions/tests/unit/test_helpers.py index eeb1c68c..2d7b5adb 100644 --- a/datahub-actions/tests/unit/test_helpers.py +++ b/datahub-actions/tests/unit/test_helpers.py @@ -51,6 +51,7 @@ None, None, None, + None, AuditStampClass(0, "urn:li:corpuser:datahub"), ) ) diff --git a/docker/datahub-actions/Dockerfile b/docker/datahub-actions/Dockerfile index 9e03253e..e58dd361 100644 --- a/docker/datahub-actions/Dockerfile +++ b/docker/datahub-actions/Dockerfile @@ -15,15 +15,21 @@ # Defining environment ARG APP_ENV=prod -FROM acryldata/datahub-ingestion-base:head-full as prod-install +FROM acryldata/datahub-ingestion-base:head-full AS prod-install COPY datahub-actions /actions-src USER root -RUN mkdir -p /etc/datahub/actions && mkdir -p /tmp/datahub/logs/actions/system -RUN cd /actions-src && \ - pip install "." && \ - pip install '.[all]' && \ +WORKDIR /actions-src + +RUN --mount=type=cache,target=/datahub-ingestion/.cache/uv,uid=1000,gid=1000 \ + mkdir -p /etc/datahub/actions && \ + mkdir -p /tmp/datahub/logs/actions/system && \ + apt-get update && \ + apt-get install -y -qq default-jre && \ + apt-get clean && \ + rm -rf /var/lib/{apt,dpkg,cache,log}/ && \ + UV_LINK_MODE=copy uv pip install -e ".[all]" && \ # This is required to fix security vulnerability in htrace-core4 rm -f /usr/local/lib/python3.10/site-packages/pyspark/jars/htrace-core4-4.1.0-incubating.jar @@ -32,22 +38,19 @@ COPY ./docker/datahub-actions/readiness-check.sh /readiness-check.sh RUN chmod a+x /start_datahub_actions.sh # Add other default configurations into this! -RUN mkdir -p /etc/datahub/actions/conf && mkdir -p /etc/datahub/actions/system/conf -COPY ./docker/config/*.yaml /etc/datahub/actions/system/conf +COPY ./docker/config /etc/datahub/actions/system/conf # datahub:datahub is created in datahub-ingestion-base image RUN chown datahub:datahub /etc/datahub && \ - chown -R datahub:datahub /tmp/datahub - -# By transferring the root user's pip cache directory to the datahub -# user, we can avoid the need for some redundant dependency downloads. -RUN mkdir -p /home/datahub/.cache \ - && mv /root/.cache/pip /home/datahub/.cache/pip \ - && chown -R datahub:datahub /home/datahub/.cache/pip + chown -R datahub:datahub /tmp/datahub && \ + # By transferring the root user's pip cache directory to the datahub + # user, we can avoid the need for some redundant dependency downloads. + mv /root/.cache /datahub-ingestion/.cache && \ + chown -R datahub:datahub /datahub-ingestion/.cache -FROM ${APP_ENV}-install as final +FROM ${APP_ENV}-install AS final USER datahub -RUN curl -s "https://get.sdkman.io" | bash -RUN /bin/bash -c "source /$HOME/.sdkman/bin/sdkman-init.sh; sdk version; sdk install java 8.0.332-zulu" +WORKDIR /datahub-ingestion + ENTRYPOINT [ ] CMD dockerize -wait ${DATAHUB_GMS_PROTOCOL:-http}://$DATAHUB_GMS_HOST:$DATAHUB_GMS_PORT/health -timeout 240s /start_datahub_actions.sh