Skip to content

Commit

Permalink
IHP-24 - wip doesn't work
Browse files Browse the repository at this point in the history
  • Loading branch information
DenisNovac committed Mar 3, 2024
1 parent f56c4c8 commit 8bb624b
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 65 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ params.conf
.idea
.bsp
converted-to-torchscript.pt
nsfw_model.pt

target/
!.mvn/wrapper/maven-wrapper.jar
Expand Down
1 change: 1 addition & 0 deletions alias.sbt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
addCommandAlias("buildResizer", "project resizer;assembly;")
addCommandAlias("buildRecognizer", "project recognizer;assembly;")
81 changes: 52 additions & 29 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,52 @@ services:
# MINIO_USER: minioadmin
# MINIO_PASSWORD: minioadmin

resizer1:
image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master
container_name: resizer1
depends_on:
- kafka-init
- minio
environment:
KAFKA_BOOTSTRAP_SERVERS: kafka:9092
CONSUMER_GROUP_ID: resizer-local-test
MINIO_HOST: "http://minio:9000"
MINIO_USER: minioadmin
MINIO_PASSWORD: minioadmin
# resizer1:
# image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master
# container_name: resizer1
# depends_on:
# - kafka-init
# - minio
# environment:
# KAFKA_BOOTSTRAP_SERVERS: kafka:9092
# CONSUMER_GROUP_ID: resizer-local-test
# MINIO_HOST: "http://minio:9000"
# MINIO_USER: minioadmin
# MINIO_PASSWORD: minioadmin
# NEW_IMAGES_TOPIC: "new-images.v1"
#
# resizer2:
# image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master
# container_name: resizer2
# depends_on:
# - kafka-init-new-images
# environment:
# KAFKA_BOOTSTRAP_SERVERS: kafka:9092
# CONSUMER_GROUP_ID: resizer-local-test
# MINIO_HOST: "http://minio:9000"
# MINIO_USER: minioadmin
# MINIO_PASSWORD: minioadmin
# NEW_IMAGES_TOPIC: "new-images.v1"

resizer2:
image: ghcr.io/baklanov-soft/image-hosting-processing-resizer:master
container_name: resizer2
recognizer1:
image: test/recognizer:latest
container_name: recognizer1
depends_on:
- kafka-init
- kafka-init-new-images
- kafka-init-categories
volumes:
- recognizer1-djl-cache:/root/.djl.ai
environment:
KAFKA_BOOTSTRAP_SERVERS: kafka:9092
CONSUMER_GROUP_ID: resizer-local-test
CONSUMER_GROUP_ID: recognizer-local-test
NEW_IMAGES_TOPIC: "new-images.v1"
CATEGORIES_TOPIC: "categories.v1"
NSFW_SYNSET_PATH: "synset.txt"
NSFW_MODEL_PATH: "nsfw_model.pt"
MINIO_HOST: "http://minio:9000"
MINIO_USER: minioadmin
MINIO_PASSWORD: minioadmin
DEBUG_CATEGORIES: false

kafka:
container_name: kafka
Expand Down Expand Up @@ -138,19 +160,20 @@ services:
"--topic", "categories.v1"
]

# kafka-ui:
# image: provectuslabs/kafka-ui
# container_name: kafka-ui
# ports:
# - "8000:8000"
# environment:
# SERVER_PORT: 8000
# KAFKA_CLUSTERS_0_NAME: image-hosting
# KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
# KAFKA_CLUSTERS_0_READONLY: true
# depends_on:
# - kafka
# kafka-ui:
# image: provectuslabs/kafka-ui
# container_name: kafka-ui
# ports:
# - "8000:8000"
# environment:
# SERVER_PORT: 8000
# KAFKA_CLUSTERS_0_NAME: image-hosting
# KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
# KAFKA_CLUSTERS_0_READONLY: true
# depends_on:
# - kafka

volumes:
minio-data:
db-data:
recognizer1-djl-cache:
9 changes: 9 additions & 0 deletions recognizer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM eclipse-temurin:17.0.6_10-jre-jammy

WORKDIR /opt/app

COPY ./target/scala-2.13/image-hosting-processing-recognizer-assembly-0.1.0-SNAPSHOT.jar ./
COPY synset.txt ./
COPY nsfw_model.pt ./

ENTRYPOINT ["java", "-cp", "image-hosting-processing-recognizer-assembly-0.1.0-SNAPSHOT.jar", "com.github.baklanovsoft.imagehosting.recognizer.Main"]
3 changes: 3 additions & 0 deletions recognizer/build_local.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
docker buildx build --platform linux/amd64 -t test/recognizer .

docker image ls | grep test/recognizer
18 changes: 18 additions & 0 deletions recognizer/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch
from PIL import Image
from transformers import AutoTokenizer

model_name = "DenisNovac/nsfw_image_detection"

model = AutoModelForImageClassification.from_pretrained(model_name, torchscript=True, return_dict=False)

processor = AutoImageProcessor.from_pretrained(model_name)

image = Image.open("images/hentai.jpg")
image_inputs = processor(images=image, return_tensors="pt")

config = {'forward': [image_inputs['pixel_values']]}
converted = torch.jit.trace_module(model, config)

torch.jit.save(converted, "nsfw_model.pt")
2 changes: 1 addition & 1 deletion recognizer/download-model.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# https://huggingface.co/DenisNovac/nsfw_image_detection
# fork of https://huggingface.co/Falconsai/nsfw_image_detection
wget -O converted-to-torchscript.pt https://huggingface.co/DenisNovac/nsfw_image_detection/resolve/main/converted-to-torchscript.pt?download=true
wget -O nsfw_model.pt https://huggingface.co/DenisNovac/nsfw_image_detection/resolve/main/converted-to-torchscript.pt?download=true
6 changes: 3 additions & 3 deletions recognizer/src/main/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ new-images-topic = ${?NEW_IMAGES_TOPIC}
debug-categories = false
debug-categories = ${?DEBUG_CATEGORIES}

nsfw-synset = "synset.txt"
nsfw-synset = ${?NSFW_SYNSET}
nsfw-model-path = "recognizer/converted-to-torchscript.pt"
nsfw-synset-path = "recognizer/synset.txt"
nsfw-synset-path = ${?NSFW_SYNSET_PATH}
nsfw-model-path = "recognizer/nsfw_model.pt"
nsfw-model-path = ${?NSFW_MODEL_PATH}

minio {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ final case class AppConfig(
newImagesTopic: String,
categoriesTopic: String,
debugCategories: Boolean,
nsfwSynset: String,
nsfwSynsetPath: String,
nsfwModelPath: String,
minio: MinioCreds
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ object Main extends IOApp with KafkaJsonDeserializer {
resources = for {
detection <- if (config.debugCategories) ObjectDetection.debug[IO](minioClient)
else ObjectDetection.production[IO]
nsfw <- NsfwDetection.of[IO](config.nsfwModelPath, config.nsfwSynset)
nsfw <- NsfwDetection.of[IO](config.nsfwModelPath, config.nsfwSynsetPath)
categorization <- Resource.eval(
CategorizationStream
.of[IO](
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import ai.djl.translate.Translator
import cats.effect.kernel.{Resource, Sync}
import cats.implicits._
import com.github.baklanovsoft.imagehosting.{BucketId, Category, ImageId, Score}
import org.typelevel.log4cats.LoggerFactory
import org.typelevel.log4cats.{Logger, LoggerFactory}

import java.nio.file.Paths
import java.nio.file.{Files, Paths}
import scala.jdk.CollectionConverters._

trait NsfwDetection[F[_]] {
Expand All @@ -24,37 +24,44 @@ trait NsfwDetection[F[_]] {

object NsfwDetection {

private def buildTranslator[F[_]: Sync](synsetPath: String): F[Translator[Image, Classifications]] = Sync[F].delay {
// copypasted from here https://github.com/deepjavalibrary/djl/issues/1419
ImageClassificationTranslator
.builder()
.optSynsetArtifactName(synsetPath)
.addTransform(new Resize(256))
// from the model description it was trained on 224x224 images so looks like it fits
.addTransform(new CenterCrop(224, 224))
.addTransform(new ToTensor())
.addTransform(
new Normalize(
Array(
0.485f,
0.456f,
0.406f
),
Array(
0.229f,
0.224f,
0.225f
private def buildTranslator[F[_]: Sync](synsetUrl: String): F[Translator[Image, Classifications]] =
Sync[F].delay {
// copypasted from here https://github.com/deepjavalibrary/djl/issues/1419
ImageClassificationTranslator
.builder()
.optSynsetUrl(synsetUrl)
.addTransform(new Resize(256))
// from the model description it was trained on 224x224 images so looks like it fits
.addTransform(new CenterCrop(224, 224))
.addTransform(new ToTensor())
.addTransform(
new Normalize(
Array(
0.485f,
0.456f,
0.406f
),
Array(
0.229f,
0.224f,
0.225f
)
)
)
)
.optApplySoftmax(true)
.build()
}
.optApplySoftmax(true)
.build()
}

private def acquireModelPredictor[F[_]: Sync](modelPath: String, synsetPath: String) =
private def acquireModelPredictor[F[_]: Sync: Logger](modelPath: String, synsetPath: String) =
Resource.make {
for {
translator <- buildTranslator(synsetPath)
lookup <- Sync[F].delay(Files.list(Paths.get("./")).toList)
_ <- Logger[F].info(s"Workdir absolute path: ${Paths.get("./").toAbsolutePath.toString}")
_ <- Logger[F].info(s"Lookup result: $lookup")
synsetUrl <- Sync[F].delay("file://" + Paths.get(synsetPath).toAbsolutePath.toString)
_ <- Logger[F].info(s"Synset constructed url: $synsetUrl")

translator <- buildTranslator(synsetUrl)
criteria <- Sync[F].delay {
Criteria
.builder()
Expand All @@ -77,8 +84,8 @@ object NsfwDetection {

def of[F[_]: Sync: LoggerFactory](modelPath: String, synsetPath: String): Resource[F, NsfwDetection[F]] =
for {
logger <- Resource.eval(LoggerFactory[F].create)
(_, predictor) <- acquireModelPredictor[F](modelPath, synsetPath)
implicit0(logger: Logger[F]) <- Resource.eval(LoggerFactory[F].create)
(_, predictor) <- acquireModelPredictor[F](modelPath, synsetPath)
} yield new NsfwDetection[F] {

override def detect(image: Image, bucketId: BucketId, imageId: ImageId): F[Option[(Category, Score)]] =
Expand Down

0 comments on commit 8bb624b

Please sign in to comment.