[Dev] Add clip and stable diffusion example (#28)

lshmouse · Nov 12, 2024 · a1d3ccb · a1d3ccb
1 parent d11142e
commit a1d3ccb
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 0 deletions.
diff --git a/experimental/clip_example/README.md b/experimental/clip_example/README.md
@@ -0,0 +1,10 @@
+## CLIP
+
+### Env
+```
+conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.6 -c pytorch -c nvidia
+```
+
+### References
+- https://github.com/openai/CLIP
+- https://github.com/tensorchord/envd
diff --git a/experimental/clip_example/clip_example.py b/experimental/clip_example/clip_example.py
@@ -0,0 +1,18 @@
+import torch
+from PIL import Image
+import requests
+
+from transformers import CLIPProcessor, CLIPModel
+
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
+print(probs)
diff --git a/experimental/stable_diffusion_example/README.md b/experimental/stable_diffusion_example/README.md
@@ -0,0 +1,4 @@
+## stable-diffusion
+
+### References
+- https://zhuanlan.zhihu.com/p/557182648
diff --git a/experimental/stable_diffusion_example/sd_example.py b/experimental/stable_diffusion_example/sd_example.py
@@ -0,0 +1,16 @@
+import torch
+from torch import autocast
+from diffusers import StableDiffusionPipeline
+
+model_id = "CompVis/stable-diffusion-v1-1"
+device = "cuda"
+
+pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
+pipe = pipe.to(device)
+
+# prompt = "a photo of an astronaut riding a horse on mars"
+prompt = "The child who suddenly appeared in front of the car from a driver view"
+with autocast("cuda"):
+    image = pipe(prompt, guidance_scale=7.5)["sample"][0]
+
+image.save("astronaut_rides_horse.png")