diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..dd6146c --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,59 @@ +# +name: Create and publish a Docker image + +# Configures this workflow to run every time a change is pushed to the branch called `release`. +on: + push: + branches: ['main'] + +# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. +jobs: + build-and-push-image: + runs-on: ubuntu-latest + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + attestations: write + id-token: write + # + steps: + - name: Checkout repository + uses: actions/checkout@v4 + # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. + # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. + # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. + - name: Build and push Docker image + id: push + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + # This step generates an artifact attestation for the image, which is an unforgeable statement about where and how it was built. It increases supply chain security for people who consume the image. For more information, see "[AUTOTITLE](/actions/security-guides/using-artifact-attestations-to-establish-provenance-for-builds)." + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v1 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true \ No newline at end of file diff --git a/.gitignore b/.gitignore index e7913cf..620470b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ xmen_index xmen *.zip biomedical +**/.DS_Store # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..60cfa22 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# Use an official Miniconda base image +FROM continuumio/miniconda3:latest + +# Set the working directory +WORKDIR / + +# Update the package list and install any dependencies +RUN apt-get update && apt-get install -y \ + curl git \ + && rm -rf /var/lib/apt/lists/* + +# Create a new conda environment with Python 3.10 +RUN conda create --name xmen python=3.10 -y + +# Activate the environment and ensure it persists +SHELL ["conda", "run", "-n", "xmen", "/bin/bash", "-c"] + +# Copy the application code and requirements file into the container +COPY requirements.txt . +COPY *.py . + +# Install binary dependencies from conda +RUN conda install -c conda-forge nmslib cymem murmurhash -y + +# Install pip dependencies +RUN pip install --no-cache-dir -r requirements.txt + +EXPOSE 5000 + +# Define the command to run the server with parameters +CMD ["/opt/conda/envs/xmen/python", "run_snomed_german_recommender.py", "--no-gpu", "--port", "5000", "index"] \ No newline at end of file diff --git a/README.md b/README.md index 2dbb983..f597877 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,18 @@ Simple integration of pre-configured [xMEN](https://github.com/hpi-dhc/xmen) pip ![External Recommender](assets/recommender.png) -### Install dependencies +### With Docker + +- `docker build -t xmen-inception` +- `` + +### Without Docker + +#### Install dependencies - `pip install -r requirements.txt` (see [here](https://github.com/hpi-dhc/xmen/issues/37) for known issues during installation of `xmen`) -### Prepare xMEN KB and index +#### Prepare xMEN KB and index **Option 1 (faster)**: - Download pre-computed xMEN index and extract contents into `xmen_index`: diff --git a/requirements.txt b/requirements.txt index d309642..a7494c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ git+https://github.com/inception-project/inception-external-recommender.git -xmen==1.0.7 +xmen==1.0.8 dateparser==1.2.0 \ No newline at end of file diff --git a/run_snomed_german_recommender.py b/run_snomed_german_recommender.py index d21499b..655c4c2 100644 --- a/run_snomed_german_recommender.py +++ b/run_snomed_german_recommender.py @@ -31,6 +31,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_ for anno, pred in zip(annos, preds): for concept in pred['normalized'][0:self.top_k]: + print(concept) sctid = concept['db_id'] score = concept['score'] prediction = create_span_prediction(cas, layer, feature, anno.begin, anno.end, f"http://snomed.info/id/{sctid}", score=score)