From a1d3ccbbb36449dc69108088bed432e50cccc265 Mon Sep 17 00:00:00 2001 From: Liu Shaohui Date: Tue, 12 Nov 2024 15:05:41 +0800 Subject: [PATCH] [Dev] Add clip and stable diffusion example (#28) --- experimental/clip_example/README.md | 10 ++++++++++ experimental/clip_example/clip_example.py | 18 ++++++++++++++++++ .../stable_diffusion_example/README.md | 4 ++++ .../stable_diffusion_example/sd_example.py | 16 ++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 experimental/clip_example/README.md create mode 100644 experimental/clip_example/clip_example.py create mode 100644 experimental/stable_diffusion_example/README.md create mode 100644 experimental/stable_diffusion_example/sd_example.py diff --git a/experimental/clip_example/README.md b/experimental/clip_example/README.md new file mode 100644 index 0000000..8382f5b --- /dev/null +++ b/experimental/clip_example/README.md @@ -0,0 +1,10 @@ +## CLIP + +### Env +``` +conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.6 -c pytorch -c nvidia +``` + +### References +- https://github.com/openai/CLIP +- https://github.com/tensorchord/envd diff --git a/experimental/clip_example/clip_example.py b/experimental/clip_example/clip_example.py new file mode 100644 index 0000000..bddccda --- /dev/null +++ b/experimental/clip_example/clip_example.py @@ -0,0 +1,18 @@ +import torch +from PIL import Image +import requests + +from transformers import CLIPProcessor, CLIPModel + +model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") +processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" +image = Image.open(requests.get(url, stream=True).raw) + +inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True) + +outputs = model(**inputs) +logits_per_image = outputs.logits_per_image # this is the image-text similarity score +probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities +print(probs) diff --git a/experimental/stable_diffusion_example/README.md b/experimental/stable_diffusion_example/README.md new file mode 100644 index 0000000..810517d --- /dev/null +++ b/experimental/stable_diffusion_example/README.md @@ -0,0 +1,4 @@ +## stable-diffusion + +### References +- https://zhuanlan.zhihu.com/p/557182648 diff --git a/experimental/stable_diffusion_example/sd_example.py b/experimental/stable_diffusion_example/sd_example.py new file mode 100644 index 0000000..420f6d4 --- /dev/null +++ b/experimental/stable_diffusion_example/sd_example.py @@ -0,0 +1,16 @@ +import torch +from torch import autocast +from diffusers import StableDiffusionPipeline + +model_id = "CompVis/stable-diffusion-v1-1" +device = "cuda" + +pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True) +pipe = pipe.to(device) + +# prompt = "a photo of an astronaut riding a horse on mars" +prompt = "The child who suddenly appeared in front of the car from a driver view" +with autocast("cuda"): + image = pipe(prompt, guidance_scale=7.5)["sample"][0] + +image.save("astronaut_rides_horse.png")