Skip to content

Commit

Permalink
[Dev] Add clip and stable diffusion example (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
lshmouse authored Nov 12, 2024
1 parent d11142e commit a1d3ccb
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 0 deletions.
10 changes: 10 additions & 0 deletions experimental/clip_example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## CLIP

### Env
```
conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.6 -c pytorch -c nvidia
```

### References
- https://github.com/openai/CLIP
- https://github.com/tensorchord/envd
18 changes: 18 additions & 0 deletions experimental/clip_example/clip_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import torch
from PIL import Image
import requests

from transformers import CLIPProcessor, CLIPModel

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)

outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
print(probs)
4 changes: 4 additions & 0 deletions experimental/stable_diffusion_example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## stable-diffusion

### References
- https://zhuanlan.zhihu.com/p/557182648
16 changes: 16 additions & 0 deletions experimental/stable_diffusion_example/sd_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import torch
from torch import autocast
from diffusers import StableDiffusionPipeline

model_id = "CompVis/stable-diffusion-v1-1"
device = "cuda"

pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
pipe = pipe.to(device)

# prompt = "a photo of an astronaut riding a horse on mars"
prompt = "The child who suddenly appeared in front of the car from a driver view"
with autocast("cuda"):
image = pipe(prompt, guidance_scale=7.5)["sample"][0]

image.save("astronaut_rides_horse.png")

0 comments on commit a1d3ccb

Please sign in to comment.