diff --git a/ci/k8s/htr.yaml b/ci/k8s/htr.yaml new file mode 100644 index 0000000..de51390 --- /dev/null +++ b/ci/k8s/htr.yaml @@ -0,0 +1,52 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: islandora-openai-htr +spec: + selector: + app: islandora-openai-htr + ports: + - protocol: TCP + port: 8001 + targetPort: 8080 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: islandora-openai-htr +spec: + replicas: 3 + selector: + matchLabels: + app: islandora-openai-htr + template: + metadata: + labels: + app: islandora-openai-htr + spec: + containers: + - name: scyllaridae-tesseract + image: lehighlts/scyllaridae-openai-htr:main + imagePullPolicy: IfNotPresent + env: + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: openai + key: api-key + resources: + requests: + memory: "128Mi" + cpu: "250m" + limits: + memory: "1Gi" + ports: + - hostPort: 8001 + containerPort: 8080 + readinessProbe: + httpGet: + path: /healthcheck + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 diff --git a/ci/k8s/ingress.yaml b/ci/k8s/ingress.yaml index 3388c11..20cc491 100644 --- a/ci/k8s/ingress.yaml +++ b/ci/k8s/ingress.yaml @@ -88,3 +88,10 @@ spec: name: islandora-ocrpdf port: number: 8080 + - path: /openai\-htr(/|$)(.*) + pathType: Prefix + backend: + service: + name: islandora-openai-htr + port: + number: 8080 diff --git a/examples/openai-htr/Dockerfile b/examples/openai-htr/Dockerfile new file mode 100644 index 0000000..221f69e --- /dev/null +++ b/examples/openai-htr/Dockerfile @@ -0,0 +1,8 @@ +ARG TAG=main +ARG DOCKER_REPOSITORY=lehighlts +FROM ${DOCKER_REPOSITORY}/scyllaridae:${TAG} + +ENV OPENAI_MODEL=gpt-4o-mini \ + PROMPT="Transcribe this image that contains handwritten text. Include all text you see in the image. In your response, say absolutely nothing except the text from the image" \ + MAX_TOKENS=300 +COPY scyllaridae.yml /app/scyllaridae.yml diff --git a/examples/openai-htr/README.md b/examples/openai-htr/README.md new file mode 100644 index 0000000..400db9f --- /dev/null +++ b/examples/openai-htr/README.md @@ -0,0 +1,14 @@ +# openai-htr + +Use OpenAI ChatGPT to transcribe handwritten text. + +## Secrets + +Requires an environment variable `OPENAI_API_KEY` + +If deploying this in kubernetes, you can create the secret via + +``` + kubectl create secret generic openai \ + --from-literal=api-key=$OPENAI_API_KEY +``` diff --git a/examples/openai-htr/cmd.sh b/examples/openai-htr/cmd.sh new file mode 100755 index 0000000..37e16f3 --- /dev/null +++ b/examples/openai-htr/cmd.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -eou pipefail + +curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "'"$OPENAI_MODEL"'", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "'"$PROMPT"'" + }, + { + "type": "image_url", + "image_url": { + "url": "'"$1"'" + } + } + ] + } + ], + "max_tokens": '"$MAX_TOKENS"' + }' | jq -r .choices[0].message.content diff --git a/examples/openai-htr/scyllaridae.yml b/examples/openai-htr/scyllaridae.yml new file mode 100644 index 0000000..a527b7e --- /dev/null +++ b/examples/openai-htr/scyllaridae.yml @@ -0,0 +1,7 @@ +allowedMimeTypes: + - "image/*" +cmdByMimeType: + default: + cmd: /app/cmd.sh + args: + - "%source-uri" diff --git a/examples/parry/scyllaridae.yml b/examples/parry/scyllaridae.yml index 4b1c553..9a264f0 100644 --- a/examples/parry/scyllaridae.yml +++ b/examples/parry/scyllaridae.yml @@ -9,3 +9,6 @@ queueMiddlewares: - queueName: islandora-merge-pdf url: http://mergepdf:8080 consumers: 3 + - queueName: islandora-openai-htr + url: http://openai-htr:8080 + consumers: 3