Skip to content

Commit

Permalink
[WWB]: Add ImageText-to-Image pipeline validation (#1373)
Browse files Browse the repository at this point in the history
CVS-159223

---------

Co-authored-by: Ilya Lavrenov <[email protected]>
  • Loading branch information
AlexKoff88 and ilya-lavrenov authored Dec 27, 2024
1 parent 842c99e commit c9d63b2
Show file tree
Hide file tree
Showing 6 changed files with 464 additions and 238 deletions.
24 changes: 20 additions & 4 deletions tools/who_what_benchmark/tests/test_cli_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def run_wwb(args):
@pytest.mark.parametrize(
("model_id", "model_type", "backend"),
[
("hf-internal-testing/tiny-stable-diffusion-torch", "image-to-image", "hf"),
("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "image-to-image", "hf"),
("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "hf"),
("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "openvino"),
("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "text-to-image", "hf"),
Expand All @@ -40,6 +42,8 @@ def test_image_model_types(model_id, model_type, backend):
"CPU",
"--model-type",
model_type,
"--num-inference-steps",
"2",
]
if backend == "hf":
wwb_args.append("--hf")
Expand All @@ -65,23 +69,24 @@ def test_image_model_types(model_id, model_type, backend):
@pytest.mark.parametrize(
("model_id", "model_type"),
[
("echarlaix/tiny-random-stable-diffusion-xl", "text-to-image"),
("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "image-to-image"),
("OpenVINO/LCM_Dreamshaper_v7-int8-ov", "text-to-image"),
],
)
def test_image_model_genai(model_id, model_type):
with tempfile.TemporaryDirectory() as temp_dir:
GT_FILE = os.path.join(temp_dir, "gt.csv")
MODEL_PATH = os.path.join(temp_dir, model_id.replace("/", "--"))

result = subprocess.run(["optimum-cli", "export",
"openvino", "-m", model_id,
result = subprocess.run(["huggingface-cli", "download",
model_id, "--local-dir",
MODEL_PATH],
capture_output=True, text=True)
assert result.returncode == 0

wwb_args = [
"--base-model",
MODEL_PATH,
model_id,
"--num-samples",
"1",
"--gt-data",
Expand All @@ -90,6 +95,8 @@ def test_image_model_genai(model_id, model_type):
"CPU",
"--model-type",
model_type,
"--num-inference-steps",
"2",
]
result = run_wwb(wwb_args)
assert result.returncode == 0
Expand All @@ -108,6 +115,8 @@ def test_image_model_genai(model_id, model_type):
"--model-type",
model_type,
"--genai",
"--num-inference-steps",
"2",
]
result = run_wwb(wwb_args)

Expand All @@ -131,6 +140,9 @@ def test_image_model_genai(model_id, model_type):
model_type,
"--output",
output_dir,
"--genai",
"--num-inference-steps",
"2",
]
result = run_wwb(wwb_args)
assert result.returncode == 0
Expand All @@ -149,6 +161,8 @@ def test_image_model_genai(model_id, model_type):
"CPU",
"--model-type",
model_type,
"--num-inference-steps",
"2",
]
result = run_wwb(wwb_args)
assert result.returncode == 0
Expand Down Expand Up @@ -182,6 +196,8 @@ def test_image_custom_dataset(model_id, model_type, backend):
"google-research-datasets/conceptual_captions",
"--dataset-field",
"caption",
"--num-inference-steps",
"2",
]
if backend == "hf":
wwb_args.append("--hf")
Expand Down
2 changes: 2 additions & 0 deletions tools/who_what_benchmark/whowhatbench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .text_evaluator import TextEvaluator as Evaluator
from .text2image_evaluator import Text2ImageEvaluator
from .visualtext_evaluator import VisualTextEvaluator
from .image2image import Image2ImageEvaluator


__all__ = [
Expand All @@ -11,5 +12,6 @@
"TextEvaluator",
"Text2ImageEvaluator",
"VisualTextEvaluator",
"Image2ImageEvaluator",
"EVALUATOR_REGISTRY",
]
129 changes: 129 additions & 0 deletions tools/who_what_benchmark/whowhatbench/image2image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import os
from typing import Any, Union

import datasets
import pandas as pd
from tqdm import tqdm
from transformers import set_seed
import torch
import openvino_genai

from .registry import register_evaluator
from .text2image_evaluator import Text2ImageEvaluator

from .whowhat_metrics import ImageSimilarity


def preprocess_fn(example):
return {
"prompts": example["Instruction_VLM-LLM"],
"images": example["source_img"],
}


def prepare_default_data(num_samples=None):
DATASET_NAME = "paint-by-inpaint/PIPE"
NUM_SAMPLES = 10 if num_samples is None else num_samples
set_seed(42)
default_dataset = datasets.load_dataset(
DATASET_NAME, split="test", streaming=True
).filter(lambda example: example["Instruction_VLM-LLM"] != "").take(NUM_SAMPLES)
return default_dataset.map(
lambda x: preprocess_fn(x), remove_columns=default_dataset.column_names
)


@register_evaluator("image-to-image")
class Image2ImageEvaluator(Text2ImageEvaluator):
def __init__(
self,
base_model: Any = None,
gt_data: str = None,
test_data: Union[str, list] = None,
metrics="similarity",
similarity_model_id: str = "openai/clip-vit-large-patch14",
num_inference_steps=4,
crop_prompts=True,
num_samples=None,
gen_image_fn=None,
seed=42,
is_genai=False,
) -> None:
assert (
base_model is not None or gt_data is not None
), "Text generation pipeline for evaluation or ground trush data must be defined"

self.test_data = test_data
self.metrics = metrics
self.crop_prompt = crop_prompts
self.num_samples = num_samples
self.num_inference_steps = num_inference_steps
self.seed = seed
self.similarity = None
self.similarity = ImageSimilarity(similarity_model_id)
self.last_cmp = None
self.gt_dir = os.path.dirname(gt_data)
self.generation_fn = gen_image_fn
self.is_genai = is_genai
self.resolution = None

if base_model:
self.gt_data = self._generate_data(
base_model, gen_image_fn, os.path.join(self.gt_dir, "reference")
)
else:
self.gt_data = pd.read_csv(gt_data, keep_default_na=False)

def _generate_data(self, model, gen_image_fn=None, image_dir="reference"):
def default_gen_image_fn(model, prompt, image, num_inference_steps, generator=None):
with torch.no_grad():
output = model(
prompt,
image=image,
num_inference_steps=num_inference_steps,
output_type="pil",
strength=0.8,
generator=generator,
)
return output.images[0]

generation_fn = gen_image_fn or default_gen_image_fn

if self.test_data:
if isinstance(self.test_data, str):
data = pd.read_csv(self.test_data)
else:
if isinstance(self.test_data, dict):
assert "prompts" in self.test_data
assert "images" in self.test_data
data = dict(self.test_data)
data = pd.DataFrame.from_dict(data)
else:
data = pd.DataFrame.from_dict(prepare_default_data(self.num_samples))

prompts = data["prompts"]
images = data["images"]
output_images = []
rng = torch.Generator(device="cpu")

if not os.path.exists(image_dir):
os.makedirs(image_dir)

for i, (prompt, image) in tqdm(enumerate(zip(prompts, images)), desc="Evaluate pipeline"):
set_seed(self.seed)
rng = rng.manual_seed(self.seed)
output = generation_fn(
model,
prompt,
image=image,
num_inference_steps=self.num_inference_steps,
generator=openvino_genai.TorchGenerator(self.seed) if self.is_genai else rng
)
image_path = os.path.join(image_dir, f"{i}.png")
output.save(image_path)
output_images.append(image_path)

res_data = {"prompts": list(prompts), "images": output_images}
df = pd.DataFrame(res_data)

return df
Loading

0 comments on commit c9d63b2

Please sign in to comment.