diff --git a/.gitignore b/.gitignore index fd649b5..150ed1f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ params.conf .idea .bsp converted-to-torchscript.pt +nsfw_model.pt target/ !.mvn/wrapper/maven-wrapper.jar diff --git a/alias.sbt b/alias.sbt index 482760b..a068018 100644 --- a/alias.sbt +++ b/alias.sbt @@ -1 +1,2 @@ addCommandAlias("buildResizer", "project resizer;assembly;") +addCommandAlias("buildRecognizer", "project recognizer;assembly;") diff --git a/docker-compose.yml b/docker-compose.yml index 6849b7f..d306f2a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,30 +15,52 @@ services: # MINIO_USER: minioadmin # MINIO_PASSWORD: minioadmin - resizer1: - image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master - container_name: resizer1 - depends_on: - - kafka-init - - minio - environment: - KAFKA_BOOTSTRAP_SERVERS: kafka:9092 - CONSUMER_GROUP_ID: resizer-local-test - MINIO_HOST: "http://minio:9000" - MINIO_USER: minioadmin - MINIO_PASSWORD: minioadmin + # resizer1: + # image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master + # container_name: resizer1 + # depends_on: + # - kafka-init + # - minio + # environment: + # KAFKA_BOOTSTRAP_SERVERS: kafka:9092 + # CONSUMER_GROUP_ID: resizer-local-test + # MINIO_HOST: "http://minio:9000" + # MINIO_USER: minioadmin + # MINIO_PASSWORD: minioadmin + # NEW_IMAGES_TOPIC: "new-images.v1" + # + # resizer2: + # image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master + # container_name: resizer2 + # depends_on: + # - kafka-init-new-images + # environment: + # KAFKA_BOOTSTRAP_SERVERS: kafka:9092 + # CONSUMER_GROUP_ID: resizer-local-test + # MINIO_HOST: "http://minio:9000" + # MINIO_USER: minioadmin + # MINIO_PASSWORD: minioadmin + # NEW_IMAGES_TOPIC: "new-images.v1" - resizer2: - image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master - container_name: resizer2 + recognizer1: + image: test/recognizer:latest + container_name: recognizer1 depends_on: - - kafka-init + - kafka-init-new-images + - kafka-init-categories + volumes: + - recognizer1-djl-cache:/root/.djl.ai environment: KAFKA_BOOTSTRAP_SERVERS: kafka:9092 - CONSUMER_GROUP_ID: resizer-local-test + CONSUMER_GROUP_ID: recognizer-local-test + NEW_IMAGES_TOPIC: "new-images.v1" + CATEGORIES_TOPIC: "categories.v1" + NSFW_SYNSET_PATH: "synset.txt" + NSFW_MODEL_PATH: "nsfw_model.pt" MINIO_HOST: "http://minio:9000" MINIO_USER: minioadmin MINIO_PASSWORD: minioadmin + DEBUG_CATEGORIES: false kafka: container_name: kafka @@ -138,19 +160,20 @@ services: "--topic", "categories.v1" ] - # kafka-ui: - # image: provectuslabs/kafka-ui - # container_name: kafka-ui - # ports: - # - "8000:8000" - # environment: - # SERVER_PORT: 8000 - # KAFKA_CLUSTERS_0_NAME: image-hosting - # KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092 - # KAFKA_CLUSTERS_0_READONLY: true - # depends_on: - # - kafka +# kafka-ui: +# image: provectuslabs/kafka-ui +# container_name: kafka-ui +# ports: +# - "8000:8000" +# environment: +# SERVER_PORT: 8000 +# KAFKA_CLUSTERS_0_NAME: image-hosting +# KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092 +# KAFKA_CLUSTERS_0_READONLY: true +# depends_on: +# - kafka volumes: minio-data: db-data: + recognizer1-djl-cache: diff --git a/recognizer/Dockerfile b/recognizer/Dockerfile new file mode 100644 index 0000000..3d511b1 --- /dev/null +++ b/recognizer/Dockerfile @@ -0,0 +1,9 @@ +FROM eclipse-temurin:17.0.6_10-jre-jammy + +WORKDIR /opt/app + +COPY ./target/scala-2.13/image-hosting-processing-recognizer-assembly-0.1.0-SNAPSHOT.jar ./ +COPY synset.txt ./ +COPY nsfw_model.pt ./ + +ENTRYPOINT ["java", "-cp", "image-hosting-processing-recognizer-assembly-0.1.0-SNAPSHOT.jar", "com.github.baklanovsoft.imagehosting.recognizer.Main"] diff --git a/recognizer/build_local.sh b/recognizer/build_local.sh new file mode 100755 index 0000000..2b20b07 --- /dev/null +++ b/recognizer/build_local.sh @@ -0,0 +1,3 @@ +docker buildx build --platform linux/amd64 -t test/recognizer . + +docker image ls | grep test/recognizer diff --git a/recognizer/convert.py b/recognizer/convert.py new file mode 100644 index 0000000..c521dee --- /dev/null +++ b/recognizer/convert.py @@ -0,0 +1,18 @@ +from transformers import AutoImageProcessor, AutoModelForImageClassification +import torch +from PIL import Image +from transformers import AutoTokenizer + +model_name = "DenisNovac/nsfw_image_detection" + +model = AutoModelForImageClassification.from_pretrained(model_name, torchscript=True, return_dict=False) + +processor = AutoImageProcessor.from_pretrained(model_name) + +image = Image.open("images/hentai.jpg") +image_inputs = processor(images=image, return_tensors="pt") + +config = {'forward': [image_inputs['pixel_values']]} +converted = torch.jit.trace_module(model, config) + +torch.jit.save(converted, "nsfw_model.pt") diff --git a/recognizer/download-model.sh b/recognizer/download-model.sh index 371563f..264a6b8 100755 --- a/recognizer/download-model.sh +++ b/recognizer/download-model.sh @@ -1,3 +1,3 @@ # https://huggingface.co/DenisNovac/nsfw_image_detection # fork of https://huggingface.co/Falconsai/nsfw_image_detection -wget -O converted-to-torchscript.pt https://huggingface.co/DenisNovac/nsfw_image_detection/resolve/main/converted-to-torchscript.pt?download=true +wget -O nsfw_model.pt https://huggingface.co/DenisNovac/nsfw_image_detection/resolve/main/converted-to-torchscript.pt?download=true diff --git a/recognizer/src/main/resources/application.conf b/recognizer/src/main/resources/application.conf index a7421e7..b5465d3 100644 --- a/recognizer/src/main/resources/application.conf +++ b/recognizer/src/main/resources/application.conf @@ -16,9 +16,9 @@ new-images-topic = ${?NEW_IMAGES_TOPIC} debug-categories = false debug-categories = ${?DEBUG_CATEGORIES} -nsfw-synset = "synset.txt" -nsfw-synset = ${?NSFW_SYNSET} -nsfw-model-path = "recognizer/converted-to-torchscript.pt" +nsfw-synset-path = "recognizer/synset.txt" +nsfw-synset-path = ${?NSFW_SYNSET_PATH} +nsfw-model-path = "recognizer/nsfw_model.pt" nsfw-model-path = ${?NSFW_MODEL_PATH} minio { diff --git a/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/AppConfig.scala b/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/AppConfig.scala index f501b85..f526c1e 100644 --- a/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/AppConfig.scala +++ b/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/AppConfig.scala @@ -9,7 +9,7 @@ final case class AppConfig( newImagesTopic: String, categoriesTopic: String, debugCategories: Boolean, - nsfwSynset: String, + nsfwSynsetPath: String, nsfwModelPath: String, minio: MinioCreds ) diff --git a/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/Main.scala b/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/Main.scala index 07065ed..7cf00e4 100644 --- a/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/Main.scala +++ b/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/Main.scala @@ -32,7 +32,7 @@ object Main extends IOApp with KafkaJsonDeserializer { resources = for { detection <- if (config.debugCategories) ObjectDetection.debug[IO](minioClient) else ObjectDetection.production[IO] - nsfw <- NsfwDetection.of[IO](config.nsfwModelPath, config.nsfwSynset) + nsfw <- NsfwDetection.of[IO](config.nsfwModelPath, config.nsfwSynsetPath) categorization <- Resource.eval( CategorizationStream .of[IO]( diff --git a/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/NsfwDetection.scala b/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/NsfwDetection.scala index 4dec4a1..7d702be 100644 --- a/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/NsfwDetection.scala +++ b/recognizer/src/main/scala/com/github/baklanovsoft/imagehosting/recognizer/NsfwDetection.scala @@ -10,9 +10,9 @@ import ai.djl.translate.Translator import cats.effect.kernel.{Resource, Sync} import cats.implicits._ import com.github.baklanovsoft.imagehosting.{BucketId, Category, ImageId, Score} -import org.typelevel.log4cats.LoggerFactory +import org.typelevel.log4cats.{Logger, LoggerFactory} -import java.nio.file.Paths +import java.nio.file.{Files, Paths} import scala.jdk.CollectionConverters._ trait NsfwDetection[F[_]] { @@ -24,37 +24,44 @@ trait NsfwDetection[F[_]] { object NsfwDetection { - private def buildTranslator[F[_]: Sync](synsetPath: String): F[Translator[Image, Classifications]] = Sync[F].delay { - // copypasted from here https://github.com/deepjavalibrary/djl/issues/1419 - ImageClassificationTranslator - .builder() - .optSynsetArtifactName(synsetPath) - .addTransform(new Resize(256)) - // from the model description it was trained on 224x224 images so looks like it fits - .addTransform(new CenterCrop(224, 224)) - .addTransform(new ToTensor()) - .addTransform( - new Normalize( - Array( - 0.485f, - 0.456f, - 0.406f - ), - Array( - 0.229f, - 0.224f, - 0.225f + private def buildTranslator[F[_]: Sync](synsetUrl: String): F[Translator[Image, Classifications]] = + Sync[F].delay { + // copypasted from here https://github.com/deepjavalibrary/djl/issues/1419 + ImageClassificationTranslator + .builder() + .optSynsetUrl(synsetUrl) + .addTransform(new Resize(256)) + // from the model description it was trained on 224x224 images so looks like it fits + .addTransform(new CenterCrop(224, 224)) + .addTransform(new ToTensor()) + .addTransform( + new Normalize( + Array( + 0.485f, + 0.456f, + 0.406f + ), + Array( + 0.229f, + 0.224f, + 0.225f + ) ) ) - ) - .optApplySoftmax(true) - .build() - } + .optApplySoftmax(true) + .build() + } - private def acquireModelPredictor[F[_]: Sync](modelPath: String, synsetPath: String) = + private def acquireModelPredictor[F[_]: Sync: Logger](modelPath: String, synsetPath: String) = Resource.make { for { - translator <- buildTranslator(synsetPath) + lookup <- Sync[F].delay(Files.list(Paths.get("./")).toList) + _ <- Logger[F].info(s"Workdir absolute path: ${Paths.get("./").toAbsolutePath.toString}") + _ <- Logger[F].info(s"Lookup result: $lookup") + synsetUrl <- Sync[F].delay("file://" + Paths.get(synsetPath).toAbsolutePath.toString) + _ <- Logger[F].info(s"Synset constructed url: $synsetUrl") + + translator <- buildTranslator(synsetUrl) criteria <- Sync[F].delay { Criteria .builder() @@ -77,8 +84,8 @@ object NsfwDetection { def of[F[_]: Sync: LoggerFactory](modelPath: String, synsetPath: String): Resource[F, NsfwDetection[F]] = for { - logger <- Resource.eval(LoggerFactory[F].create) - (_, predictor) <- acquireModelPredictor[F](modelPath, synsetPath) + implicit0(logger: Logger[F]) <- Resource.eval(LoggerFactory[F].create) + (_, predictor) <- acquireModelPredictor[F](modelPath, synsetPath) } yield new NsfwDetection[F] { override def detect(image: Image, bucketId: BucketId, imageId: ImageId): F[Option[(Category, Score)]] =