[IHP-24] Recognition docker container (#26)

Baklanov-Soft · Mar 6, 2024 · 9ba7f42 · 9ba7f42
1 parent f56c4c8
commit 9ba7f42
Show file tree

Hide file tree

Showing 16 changed files with 272 additions and 90 deletions.
diff --git a/.github/workflows/publish-recognizer.yml b/.github/workflows/publish-recognizer.yml
@@ -0,0 +1,63 @@
+name: publish resizer
+
+on:
+  push:
+    branches: [ master ]
+    paths:
+      - 'recognizer/**'
+      - 'domain/**'
+      - 'common/**'
+      - 'project/**'
+      - 'build.sbt'
+      - '.github/**'
+
+env:
+  IMAGE_NAME: image-hosting-processing-recognizer
+
+jobs:
+  publish-container:
+    runs-on: ubuntu-latest
+
+    permissions:
+      packages: write
+      contents: read
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Log in to registry
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin
+
+      - name: set up JDK 17
+        uses: actions/setup-java@v1
+        with:
+          java-version: 17
+
+      #- name: run tests
+      #  run: sbt test
+
+      #- name: run integration tests
+      #  run: sbt it:test
+
+      - name: Assembly
+        run: sbt buildRecognizer
+
+      - name: Build image
+        run: docker build ./recognizer --tag $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}"
+
+      - name: Push image
+        run: |
+          
+          IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME
+          
+          # Change all uppercase to lowercase
+          IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
+          
+          # make version be equal to branch name (in case we want to have several branches to push container)
+          VERSION=$GITHUB_REF_NAME
+          
+          echo IMAGE_ID=$IMAGE_ID
+          echo VERSION=$VERSION
+          
+          docker tag $IMAGE_NAME $IMAGE_ID:$VERSION
+          docker push $IMAGE_ID:$VERSION
diff --git a/.github/workflows/publish-resizer.yml b/.github/workflows/publish-resizer.yml
@@ -9,6 +9,7 @@ on:
       - 'common/**'
       - 'project/**'
       - 'build.sbt'
+      - '.github/**'
 
 env:
   IMAGE_NAME: image-hosting-processing-resizer

diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@ params.conf
 .idea
 .bsp
 converted-to-torchscript.pt
+nsfw_model.pt
 
 target/
 !.mvn/wrapper/maven-wrapper.jar

diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 
 Support part of https://github.com/Baklanov-Soft/image-hosting-storage
 
+See docker-compose for settings example.
+
 ## Resizer
 
 Resizer service for generating the previews. Docker Compose contains 2 instances by default (=partitions amount of
@@ -10,10 +12,10 @@ new images topic).
 Environment variables:
 
 ```
-KAFKA_BOOTSTRAP_SERVERS - kafka cluster url (Default: localhost:9092)
-CONSUMER_GROUP_ID - consumer id, multiple instances with same id will allow horizontal scaling (depends on topic paritions) (Default: resizer-local-test)
-NEW_IMAGES_TOPIC - topic for notifications about new images (Default: "new-images.v1")
-MINIO_HOST - host of minio from where it will take pictures and where it is going to upload the previews
+KAFKA_BOOTSTRAP_SERVERS - kafka cluster url
+CONSUMER_GROUP_ID - consumer id, multiple instances with same id will allow horizontal scaling (depends on topic paritions) 
+NEW_IMAGES_TOPIC - topic for notifications about new images
+MINIO_HOST - minio from where it will take pictures and where it is going to upload the previews
 MINIO_USER
 MINIO_PASSWORD
 ```
@@ -36,25 +38,30 @@ It creates multiple preview images inside the same Minio as it reads from (insid
 Service for object detection and nsfw content detection.
 
 NSFW detection based on model: https://huggingface.co/Falconsai/nsfw_image_detection
+Currently NSFW detection only works on porn images. It doesn't recognize blood or any other stuff.
 
 Converted to DJL TorchScript model (required for service to
-work): https://huggingface.co/DenisNovac/nsfw_image_detection/tree/main
+work, you will need to mount it to docker (see docker-compose for
+reference)): https://huggingface.co/DenisNovac/nsfw_image_detection/tree/main
 
 Environment variables:
 
 ```
-KAFKA_BOOTSTRAP_SERVERS - kafka cluster url (Default: localhost:9092)
-CONSUMER_GROUP_ID - consumer id, multiple instances with same id will allow horizontal scaling (depends on topic paritions) (Default: recognizer-local-test)
-NEW_IMAGES_TOPIC - topic for notifications about new images (Default: "new-images.v1")
-CATEGORIES_TOPIC - topic for output of service (Default: "categories.v1")
+KAFKA_BOOTSTRAP_SERVERS - kafka cluster url
+CONSUMER_GROUP_ID - consumer id, multiple instances with same id will allow horizontal scaling (depends on topic paritions) 
+NEW_IMAGES_TOPIC - topic for notifications about new images 
+CATEGORIES_TOPIC - topic for output of service 
 DEBUG_CATEGORIES - write debug object detection pictures (draw squares around detected objects) into debug folder (HEAVY PNG)
-NSFW_SYNSET - synset.txt file for nsfw detector (list of categories, included in project)
+NSFW_SYNSET_PATH - synset.txt file for nsfw detector (list of categories, included in project)
 NSFW_MODEL_PATH - pre-trained model for nsfw detection, requires one specific model, others could be working wrong
-MINIO_HOST - host of minio from where it will take pictures
+ENABLE_NSFW_DETECTION - allows to disable nsfw detection completely (and skip it's init)
+MINIO_HOST - minio from where it will take (and save debug) pictures
 MINIO_USER
 MINIO_PASSWORD
 ```
 
+**NOTE:** nsfw model and synset must be in subfolder such as /nsfw (see docker-compose for reference).
+
 ### Protocol
 
 Recognizer reads `{NEW_IMAGES_TOPIC}` Kafka topic and accepts messages in following format (v1):

diff --git a/alias.sbt b/alias.sbt
@@ -1 +1,2 @@
 addCommandAlias("buildResizer", "project resizer;assembly;")
+addCommandAlias("buildRecognizer", "project recognizer;assembly;")
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -2,57 +2,100 @@ version: "3"
 
 services:
 
-  #  resizer:
-  #    build: ./resizer
-  #    container_name: resizer
-  #    depends_on:
-  #      - kafka-init
-  #      - minio
-  #    environment:
-  #      KAFKA_BOOTSTRAP_SERVERS: kafka:9092
-  #      CONSUMER_GROUP_ID: resizer-local-test
-  #      MINIO_HOST: "http://minio:9000"
-  #      MINIO_USER: minioadmin
-  #      MINIO_PASSWORD: minioadmin
-
   resizer1:
     image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master
     container_name: resizer1
     depends_on:
-      - kafka-init
+      - kafka-init-new-images
       - minio
     environment:
       KAFKA_BOOTSTRAP_SERVERS: kafka:9092
       CONSUMER_GROUP_ID: resizer-local-test
       MINIO_HOST: "http://minio:9000"
       MINIO_USER: minioadmin
       MINIO_PASSWORD: minioadmin
+      NEW_IMAGES_TOPIC: "new-images.v1"
 
   resizer2:
     image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master
     container_name: resizer2
     depends_on:
-      - kafka-init
+      - kafka-init-new-images
+      - minio
     environment:
       KAFKA_BOOTSTRAP_SERVERS: kafka:9092
       CONSUMER_GROUP_ID: resizer-local-test
       MINIO_HOST: "http://minio:9000"
       MINIO_USER: minioadmin
       MINIO_PASSWORD: minioadmin
+      NEW_IMAGES_TOPIC: "new-images.v1"
+
+  recognizer1:
+    image: ghcr.io/baklanov-soft/image-hosting-processing-recognizer:master
+    container_name: recognizer1
+    depends_on:
+      - kafka-init-new-images
+      - kafka-init-categories
+      - minio
+    volumes:
+      - recognizer1-djl-cache:/root/.djl.ai
+      - "./recognizer/synset.txt:/opt/app/nsfw/synset.txt"
+      # download it from here https://huggingface.co/DenisNovac/nsfw_image_detection/
+      - "./recognizer/nsfw_model.pt:/opt/app/nsfw/nsfw_model.pt"
+    environment:
+      KAFKA_BOOTSTRAP_SERVERS: kafka:9092
+      CONSUMER_GROUP_ID: recognizer-local-test
+      NEW_IMAGES_TOPIC: "new-images.v1"
+      CATEGORIES_TOPIC: "categories.v1"
+      ENABLE_NSFW_DETECTION: true
+      NSFW_SYNSET_PATH: "nsfw/synset.txt"
+      NSFW_MODEL_PATH: "nsfw/nsfw_model.pt"
+      MINIO_HOST: "http://minio:9000"
+      MINIO_USER: minioadmin
+      MINIO_PASSWORD: minioadmin
+      DEBUG_CATEGORIES: true
+
+  recognizer2:
+    image: ghcr.io/baklanov-soft/image-hosting-processing-recognizer:master
+    container_name: recognizer2
+    depends_on:
+      - kafka-init-new-images
+      - kafka-init-categories
+      - minio
+    volumes:
+      - recognizer2-djl-cache:/root/.djl.ai
+      - "./recognizer/synset.txt:/opt/app/nsfw/synset.txt"
+      # download it from here https://huggingface.co/DenisNovac/nsfw_image_detection/
+      - "./recognizer/nsfw_model.pt:/opt/app/nsfw/nsfw_model.pt"
+    environment:
+      KAFKA_BOOTSTRAP_SERVERS: kafka:9092
+      CONSUMER_GROUP_ID: recognizer-local-test
+      NEW_IMAGES_TOPIC: "new-images.v1"
+      CATEGORIES_TOPIC: "categories.v1"
+      ENABLE_NSFW_DETECTION: true
+      NSFW_SYNSET_PATH: "nsfw/synset.txt"
+      NSFW_MODEL_PATH: "nsfw/nsfw_model.pt"
+      MINIO_HOST: "http://minio:9000"
+      MINIO_USER: minioadmin
+      MINIO_PASSWORD: minioadmin
+      DEBUG_CATEGORIES: true
 
   kafka:
     container_name: kafka
     image: bitnami/kafka:3.6.1
     ports:
-      - "9092:9092"
+      - "9094:9094"
     environment:
+      KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: INTERNAL:PLAINTEXT,CONTROLLER:PLAINTEXT,EXTERNAL:PLAINTEXT
+      KAFKA_CFG_LISTENERS: INTERNAL://kafka:9092,CONTROLLER://kafka:9093,EXTERNAL://:9094
+      KAFKA_CFG_ADVERTISED_LISTENERS: INTERNAL://kafka:9092,EXTERNAL://localhost:9094
+      KAFKA_CFG_INTER_BROKER_LISTENER_NAME: INTERNAL
+      KAFKA_CFG_CONTROLLER_LISTENER_NAMES: CONTROLLER
+      # cluster config
+      KAFKA_KRAFT_CLUSTER_ID: LelM2dIFQkiUFvXCEcqRWA
       KAFKA_CFG_NODE_ID: 0
       KAFKA_CFG_PROCESS_ROLES: controller,broker
-      KAFKA_CFG_LISTENERS: PLAINTEXT://kafka:9092,CONTROLLER://kafka:9093
-      KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
       KAFKA_CFG_CONTROLLER_QUORUM_VOTERS: 0@kafka:9093
-      KAFKA_CFG_CONTROLLER_LISTENER_NAMES: CONTROLLER
-      KAFKA_KRAFT_CLUSTER_ID: LelM2dIFQkiUFvXCEcqRWA
 
   minio:
     container_name: minio
@@ -138,19 +181,21 @@ services:
       "--topic", "categories.v1"
     ]
 
-  #  kafka-ui:
-  #    image: provectuslabs/kafka-ui
-  #    container_name: kafka-ui
-  #    ports:
-  #      - "8000:8000"
-  #    environment:
-  #      SERVER_PORT: 8000
-  #      KAFKA_CLUSTERS_0_NAME: image-hosting
-  #      KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
-  #      KAFKA_CLUSTERS_0_READONLY: true
-  #    depends_on:
-  #      - kafka
+  kafka-ui:
+    image: provectuslabs/kafka-ui
+    container_name: kafka-ui
+    ports:
+      - "8000:8000"
+    environment:
+      SERVER_PORT: 8000
+      KAFKA_CLUSTERS_0_NAME: image-hosting
+      KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
+      KAFKA_CLUSTERS_0_READONLY: true
+    depends_on:
+      - kafka
 
 volumes:
   minio-data:
   db-data:
+  recognizer1-djl-cache:
+  recognizer2-djl-cache:
diff --git a/recognizer/Dockerfile b/recognizer/Dockerfile
@@ -0,0 +1,10 @@
+FROM eclipse-temurin:17-jre-jammy
+
+WORKDIR /opt/app
+
+COPY ./target/scala-2.13/image-hosting-processing-recognizer-assembly-0.1.0-SNAPSHOT.jar ./app.jar
+
+# subfolder to mount nsfw model and synset here
+RUN mkdir /opt/app/nsfw
+
+ENTRYPOINT ["java", "-cp", "app.jar", "com.github.baklanovsoft.imagehosting.recognizer.Main"]
diff --git a/recognizer/build_local.sh b/recognizer/build_local.sh
@@ -0,0 +1,11 @@
+#docker buildx build --platform linux/amd64 -t test/recognizer .
+
+cd ..
+
+sbt buildRecognizer
+
+cd recognizer
+
+docker build -t test/recognizer .
+
+docker image ls | grep test/recognizer
diff --git a/recognizer/convert.py b/recognizer/convert.py
@@ -0,0 +1,18 @@
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+import torch
+from PIL import Image
+from transformers import AutoTokenizer
+
+model_name = "DenisNovac/nsfw_image_detection"
+
+model = AutoModelForImageClassification.from_pretrained(model_name, torchscript=True, return_dict=False)
+
+processor = AutoImageProcessor.from_pretrained(model_name)
+
+image = Image.open("images/hentai.jpg")
+image_inputs = processor(images=image, return_tensors="pt")
+
+config = {'forward': [image_inputs['pixel_values']]}
+converted = torch.jit.trace_module(model,  config)
+
+torch.jit.save(converted, "nsfw_model.pt")
diff --git a/recognizer/download-model.sh b/recognizer/download-model.sh
@@ -1,3 +1,3 @@
 # https://huggingface.co/DenisNovac/nsfw_image_detection
 # fork of https://huggingface.co/Falconsai/nsfw_image_detection
-wget -O converted-to-torchscript.pt https://huggingface.co/DenisNovac/nsfw_image_detection/resolve/main/converted-to-torchscript.pt?download=true
+wget -O nsfw_model.pt https://huggingface.co/DenisNovac/nsfw_image_detection/resolve/main/converted-to-torchscript.pt?download=true
diff --git a/recognizer/src/main/resources/application.conf b/recognizer/src/main/resources/application.conf
@@ -1,6 +1,6 @@
 include "params.conf"
 
-kafka-bootstrap-servers = "localhost:9092"
+kafka-bootstrap-servers = "localhost:9094"
 kafka-bootstrap-servers = ${?KAFKA_BOOTSTRAP_SERVERS}
 
 consumer-group-id = "recognizer-local-test"
@@ -16,9 +16,11 @@ new-images-topic = ${?NEW_IMAGES_TOPIC}
 debug-categories = false
 debug-categories = ${?DEBUG_CATEGORIES}
 
-nsfw-synset = "synset.txt"
-nsfw-synset = ${?NSFW_SYNSET}
-nsfw-model-path = "recognizer/converted-to-torchscript.pt"
+enable-nsfw-detection = true
+enable-nsfw-detection = ${?ENABLE_NSFW_DETECTION}
+nsfw-synset-path = "recognizer/synset.txt"
+nsfw-synset-path = ${?NSFW_SYNSET_PATH}
+nsfw-model-path = "recognizer/nsfw_model.pt"
 nsfw-model-path = ${?NSFW_MODEL_PATH}
 
 minio {

diff --git a/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/AppConfig.scala b/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/AppConfig.scala
@@ -9,7 +9,8 @@ final case class AppConfig(
     newImagesTopic: String,
     categoriesTopic: String,
     debugCategories: Boolean,
-    nsfwSynset: String,
+    enableNsfwDetection: Boolean,
+    nsfwSynsetPath: String,
     nsfwModelPath: String,
     minio: MinioCreds
 )
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ params.conf @@
     .idea
     .bsp
     converted-to-torchscript.pt
+    nsfw_model.pt
     target/
     !.mvn/wrapper/maven-wrapper.jar
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		addCommandAlias("buildResizer", "project resizer;assembly;")
		addCommandAlias("buildRecognizer", "project recognizer;assembly;")