Skip to content

Commit

Permalink
Initial drop
Browse files Browse the repository at this point in the history
  • Loading branch information
dgrechka committed Aug 10, 2022
1 parent fe96829 commit 98badce
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 84 deletions.
35 changes: 35 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# downloader stage is used to obtain the model code from provate repo + model weights from Zenodo
FROM ubuntu AS downloader
WORKDIR /work
RUN apt-get update && apt-get install --no-install-recommends -y ca-certificates wget unzip

RUN mkdir /app
# downloading pretrained weights from Zenodo
RUN wget https://zenodo.org/record/6663662/files/head_swin_bnneck.zip -O /app/head_swin_bnneck.zip
RUN unzip /app/head_swin_bnneck.zip -d /app/head_swin_bnneck
RUN rm /app/head_swin_bnneck.zip
# last.ckpt is the same as model.ckpt, so deleting it to save image space
RUN rm /app/head_swin_bnneck/last.ckpt

FROM python:3.9-slim AS FINAL

# installing openCV dependencies
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && rm -rf /var/lib/apt/lists/*

WORKDIR /app
COPY requirements.txt /requirements.txt

# --extra-index-url https://download.pytorch.org/whl/cpu avoids CUDA installation
RUN python -m pip install --upgrade pip && pip install --extra-index-url https://download.pytorch.org/whl/cpu -r /requirements.txt
COPY --from=downloader /app .

ENV KAFKA_URL=kafka:9092
ENV INPUT_QUEUE=kashtanka_calvin_zhirui_yolov5_output
ENV OUTPUT_QUEUE=kashtanka_calvin_zhirui_embeddings_output
CMD python3 serve.py
COPY code .

FROM FINAL as TESTS
COPY example /app/example
RUN python -m unittest discover -v

31 changes: 19 additions & 12 deletions code/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import numpy as np
from easydict import EasyDict

import copy

from model import LitModule

import kafkajobs
Expand Down Expand Up @@ -49,33 +51,38 @@ def load_pretrained_model(ckpt, device="cuda:0"):

@torch.inference_mode()
def get_embedding(model, image):
# image = cv2.cvtColor(cv2.imread(self.image_path[item]), cv2.COLOR_BGR2RGB)
# if self.transform:
# get_infer_transform...
# image = self.transform(image=image)['image']
# rst["images"]
# batch = {k: torch.tensor(v).to(device) for k, v in data.items()}
print(f"image shape: {image.shape}")
#print(f"image shape: {image.shape}")

embedding = model(image).cpu().numpy()
#embeddings = np.vstack(embeddings)
# embeddings = normalize(embeddings, axis=1, norm="l2")

print(f"embedding size {embedding.shape}")
#print(f"embedding size {embedding.shape}")
return embedding

def get_embedding_for_json(model, preproc_transform, serialized_image):
# TODO: avoid wrapping with list
npImage = kafkajobs.serialization.imagesFieldToNp([serialized_image])[0]
npImage = preproc_transform(image=npImage)['image']

# adding batch dimension
npImage = npImage[np.newaxis, ...]
embeddings = get_embedding(model, npImage)
return embeddings

def process_job(model, preproc_transform, job):
output_job = copy.deepcopy(job)
yolo5_output = output_job["yolo5_output"]
del output_job["yolo5_output"]

for entry in yolo5_output:
entry["embedding"] = kafkajobs.serialization.npArrayToBase64str(get_embedding_for_json(model, preproc_transform, entry["head"]))
del entry["head"]
del entry["annotated"]

output_job["image_embeddings"] = yolo5_output


#def run_predict(save_dir, data_dir, model, filt=None, device='cuda:0'):
# get_embeddings(model, lost_query, device, lost_query_emb)
return output_job




24 changes: 0 additions & 24 deletions code/self_test.py

This file was deleted.

36 changes: 36 additions & 0 deletions code/serve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import infer

import kafkajobs
from infer import process_job

kafkaUrl = os.environ['KAFKA_URL']
inputQueueName = os.environ['INPUT_QUEUE']
outputQueueName = os.environ['OUTPUT_QUEUE']

appName = "zhiru-calvin-head-swin-bnneck-feature-extractor"

worker = kafkajobs.jobqueue.JobQueueWorker(appName, kafkaBootstrapUrl=kafkaUrl, topicName=inputQueueName, appName=appName)
resultQueue = kafkajobs.jobqueue.JobQueueProducer(kafkaUrl, outputQueueName, appName)


device = 'cpu'
model,config = infer.load_pretrained_model("./head_swin_bnneck", device)
preproc_transform = infer.get_infer_transform(config.image_size)
print("model loaded")

def work():
print("Service started. Pooling for a job")
while True:
job = worker.GetNextJob(5000)
uid = job["uid"]

print("Got job {0}".format(uid))

out_job = process_job(model,preproc_transform, job)

resultQueue.Enqueue(uid, out_job)
worker.Commit()
print("{0}: Job processed successfully, results are submited to kafka".format(uid))

work()
43 changes: 43 additions & 0 deletions code/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@

import unittest
import json

import infer

device = 'cpu'

model,config = infer.load_pretrained_model("./head_swin_bnneck", device)
#print("Config")
#print(config)
preproc_transform = infer.get_infer_transform(config.image_size)

class EmbeddingsTest(unittest.TestCase):
def test_embedding_produced(self):
input_shapshot_path = "./example/input_snapshot.json"
input_snapshot = json.load(open(input_shapshot_path))

input_image = input_snapshot["yolo5_output"][0]["head"]

embedding = infer.get_embedding_for_json(model, preproc_transform, input_image)

print(embedding)
assert embedding.shape[0] == 1
assert embedding.shape[1] == 1024

def test_process_job(self):
input_shapshot_path = "./example/input_snapshot.json"
job = json.load(open(input_shapshot_path))

expected_output_path = "./example/expected_output_snapshot.json"
expected_output = json.load(open(expected_output_path))

output_job = infer.process_job(model, preproc_transform, job)

assert json.dumps(expected_output) == json.dumps(output_job)
assert "yolo5_output" not in output_job



if __name__ == "__main__":
unittest.main()
print("Self test success")
44 changes: 44 additions & 0 deletions example/expected_output_snapshot.json

Large diffs are not rendered by default.

49 changes: 1 addition & 48 deletions example/input_snapshot.json

Large diffs are not rendered by default.

0 comments on commit 98badce

Please sign in to comment.