Skip to content

Commit

Permalink
create image_sscd.py and corresponding changes in Dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmednasserswe committed Oct 30, 2023
1 parent cae6e6e commit 2dad0b6
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ EXPOSE ${PRESTO_PORT}
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive

RUN git clone https://github.com/facebookresearch/sscd-copy-detection.git
RUN cd sscd-copy-detection && python -m pip install -r ./requirements.txt --extra-index-url https://download.pytorch.org/whl/cu113
RUN mkdir models_files
RUN cd sscd-copy-detection && wget https://dl.fbaipublicfiles.com/sscd-copy-detection/sscd_disc_mixup.torchscript.pt

RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev git
RUN ln -s /usr/bin/ffmpeg /usr/local/bin/ffmpeg

Expand Down
70 changes: 70 additions & 0 deletions lib/model/image_sscd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from typing import Dict
import io
import urllib.request

from lib.model.model import Model

from pdqhashing.hasher.pdq_hasher import PDQHasher
from lib import schemas
from torchvision import transforms
from PIL import Image
import torch
from lib.logger import logger
import requests
import numpy as np

class Model(Model):
def compute_sscd(self, image_url: str) -> str:
"""Compute perceptual hash using ImageHash library
:param im: Numpy.ndarray
:returns: Imagehash.ImageHash
"""
# pdq_hasher = PDQHasher()
# hash_and_qual = pdq_hasher.fromBufferedImage(iobytes)
# return hash_and_qual.getHash().dumpBitsFlat()
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],
)
small_288 = transforms.Compose([
transforms.Resize(288),
transforms.ToTensor(),
normalize,
])
skew_320 = transforms.Compose([
transforms.Resize([320, 320]),
transforms.ToTensor(),
normalize,
])

model = torch.jit.load("sscd_disc_mixup.torchscript.pt")
# img = Image.open(image_file_path).convert('RGB')

response = requests.get(image_url)
img = Image.open(io.BytesIO(response.content))
# img = Image.open(image.body.url).convert('RGB')

batch = small_288(img).unsqueeze(0)
embedding = model(batch)[0, :]
return np.asarray(embedding.detach().numpy()).tolist()

def get_iobytes_for_image(self, image: schemas.Message) -> io.BytesIO:
"""
Read file as bytes after requesting based on URL.
"""
return io.BytesIO(
urllib.request.urlopen(
urllib.request.Request(
image.body.url,
headers={'User-Agent': 'Mozilla/5.0'}
)
).read()
)

def process(self, image: schemas.Message) -> schemas.ImageOutput:
"""
Generic function for returning the actual response.
"""

# get_image_embeddings("example-image-airplane1.png",
# "/content/sscd-copy-detection/models_files/sscd_disc_mixup.torchscript.pt")
return {"embeddings": self.compute_sscd(image.body.url)}

0 comments on commit 2dad0b6

Please sign in to comment.