Skip to content

Commit

Permalink
Bump Optimum to 1.22 + Adapt to the SD task refactoring in Optimum ma…
Browse files Browse the repository at this point in the history
…in (#686)

* update for sentence transformers

* fix sd export and inference

* remove useless

* fix CIS

* restore
  • Loading branch information
JingyaHuang authored Sep 20, 2024
1 parent 39ace4d commit 8445656
Show file tree
Hide file tree
Showing 13 changed files with 60 additions and 79 deletions.
6 changes: 2 additions & 4 deletions docs/source/inference_tutorials/stable_diffusion.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ Here is an example of exporting stable diffusion components with `Optimum` CLI:

```bash
optimum-cli export neuron --model stabilityai/stable-diffusion-2-1-base \
--task stable-diffusion \
--batch_size 1 \
--height 512 `# height in pixels of generated image, eg. 512, 768` \
--width 512 `# width in pixels of generated image, eg. 512, 768` \
Expand Down Expand Up @@ -229,7 +228,6 @@ Here is an example of exporting SDXL components with `Optimum` CLI:

```bash
optimum-cli export neuron --model stabilityai/stable-diffusion-xl-base-1.0 \
--task stable-diffusion-xl \
--batch_size 1 \
--height 1024 `# height in pixels of generated image, eg. 768, 1024` \
--width 1024 `# width in pixels of generated image, eg. 768, 1024` \
Expand Down Expand Up @@ -481,7 +479,7 @@ Here we will compile the [`stabilityai/sdxl-turbo`](https://huggingface.co/stabi
### Compile SDXL Turbo

```bash
optimum-cli export neuron --model stabilityai/sdxl-turbo --task stable-diffusion-xl --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/
optimum-cli export neuron --model stabilityai/sdxl-turbo --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/
```

### Text-to-Image
Expand Down Expand Up @@ -562,7 +560,7 @@ We can either compile one or multiple ControlNet via the Optimum CLI or programa
* Export via the Optimum CLI

```bash
optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --task stable-diffusion --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/
optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/
```

* Export via Python API
Expand Down
7 changes: 0 additions & 7 deletions optimum/commands/export/neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ def parse_args_neuron(parser: "ArgumentParser"):
f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}."
),
)
optional_group.add_argument(
"--library-name",
type=str,
choices=["transformers", "sentence_transformers"],
default=None,
help=("The library on the model. If not provided, will attempt to infer the local checkpoint's library."),
)
optional_group.add_argument(
"--subfolder",
type=str,
Expand Down
7 changes: 0 additions & 7 deletions optimum/commands/export/neuronx.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,6 @@ def parse_args_neuronx(parser: "ArgumentParser"):
f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}."
),
)
optional_group.add_argument(
"--library-name",
type=str,
choices=["transformers", "diffusers", "sentence_transformers"],
default=None,
help=("The library of the model." " If not provided, will attempt to infer the local checkpoint's library."),
)
optional_group.add_argument(
"--subfolder",
type=str,
Expand Down
32 changes: 13 additions & 19 deletions optimum/exporters/neuron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def get_submodels_and_neuron_configs(
input_shapes: Dict[str, int],
task: str,
output: Path,
library_name: Optional[str] = None,
library_name: str,
subfolder: str = "",
dynamic_batch_size: bool = False,
model_name_or_path: Optional[Union[str, Path]] = None,
Expand All @@ -276,19 +276,17 @@ def get_submodels_and_neuron_configs(
lora_scales: Optional[Union[float, List[float]]] = None,
controlnet_ids: Optional[Union[str, List[str]]] = None,
):
is_stable_diffusion = "stable-diffusion" in task
is_encoder_decoder = (
getattr(model.config, "is_encoder_decoder", False) if isinstance(model.config, PretrainedConfig) else False
)

if is_stable_diffusion:
if library_name == "diffusers":
# TODO: Enable optional outputs for Stable Diffusion
if output_attentions:
raise ValueError(f"`output_attentions`is not supported by the {task} task yet.")
models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_stable_diffusion(
model=model,
input_shapes=input_shapes,
task=task,
output=output,
dynamic_batch_size=dynamic_batch_size,
submodels=submodels,
Expand Down Expand Up @@ -349,7 +347,6 @@ def _normalize_lora_params(lora_model_ids, lora_weight_names, lora_adapter_names
def _get_submodels_and_neuron_configs_for_stable_diffusion(
model: Union["PreTrainedModel", "DiffusionPipeline"],
input_shapes: Dict[str, int],
task: str,
output: Path,
dynamic_batch_size: bool = False,
submodels: Optional[Dict[str, Union[Path, str]]] = None,
Expand Down Expand Up @@ -387,7 +384,6 @@ def _get_submodels_and_neuron_configs_for_stable_diffusion(
)
models_and_neuron_configs = get_stable_diffusion_models_for_export(
pipeline=model,
task=task,
text_encoder_input_shapes=input_shapes["text_encoder"],
unet_input_shapes=input_shapes["unet"],
vae_encoder_input_shapes=input_shapes["vae_encoder"],
Expand Down Expand Up @@ -470,6 +466,7 @@ def load_models_and_neuron_configs(
trust_remote_code: bool,
subfolder: str,
revision: str,
library_name: str,
force_download: bool,
local_files_only: bool,
token: Optional[Union[bool, str]],
Expand All @@ -481,13 +478,8 @@ def load_models_and_neuron_configs(
controlnet_ids: Optional[Union[str, List[str]]] = None,
output_attentions: bool = False,
output_hidden_states: bool = False,
library_name: Optional[str] = None,
**input_shapes,
):
library_name = TasksManager.infer_library_from_model(
model_name_or_path, subfolder=subfolder, library_name=library_name
)

model_kwargs = {
"task": task,
"model_name_or_path": model_name_or_path,
Expand Down Expand Up @@ -562,6 +554,10 @@ def main_export(
output.parent.mkdir(parents=True)

task = TasksManager.map_from_synonym(task)
if library_name is None:
library_name = TasksManager.infer_library_from_model(
model_name_or_path, revision=revision, cache_dir=cache_dir, token=token
)

models_and_neuron_configs, output_model_names = load_models_and_neuron_configs(
model_name_or_path=model_name_or_path,
Expand All @@ -573,13 +569,13 @@ def main_export(
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
library_name=library_name,
force_download=force_download,
local_files_only=local_files_only,
token=token,
submodels=submodels,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
library_name=library_name,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
Expand All @@ -602,8 +598,7 @@ def main_export(

# Validate compiled model
if do_validation is True:
is_stable_diffusion = "stable-diffusion" in task
if is_stable_diffusion:
if library_name == "diffusers":
# Do not validate vae encoder due to the sampling randomness
neuron_outputs.pop("vae_encoder")
models_and_neuron_configs.pop("vae_encoder", None)
Expand Down Expand Up @@ -672,13 +667,12 @@ def main():
args = parser.parse_args()

task = infer_task(args.task, args.model)
is_stable_diffusion = "stable-diffusion" in task
is_sentence_transformers = args.library_name == "sentence_transformers"
library_name = TasksManager.infer_library_from_model(args.model, cache_dir=args.cache_dir)

if is_stable_diffusion:
if library_name == "diffusers":
input_shapes = normalize_stable_diffusion_input_shapes(args)
submodels = {"unet": args.unet}
elif is_sentence_transformers:
elif library_name == "sentence_transformers":
input_shapes = normalize_sentence_transformers_input_shapes(args)
submodels = None
else:
Expand Down Expand Up @@ -722,7 +716,7 @@ def main():
subfolder=args.subfolder,
do_validation=not args.disable_validation,
submodels=submodels,
library_name=args.library_name,
library_name=library_name,
lora_model_ids=getattr(args, "lora_model_ids", None),
lora_weight_names=getattr(args, "lora_weight_names", None),
lora_adapter_names=getattr(args, "lora_adapter_names", None),
Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/neuron/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def add_stable_diffusion_compiler_args(config, compiler_args):
compiler_args.append("--enable-fast-loading-neuron-binaries")
# unet or controlnet
if "unet" in identifier or "controlnet" in identifier:
# SDXL unet doesn't support fast loading neuron binaries
# SDXL unet doesn't support fast loading neuron binaries(sdk 2.19.1)
if not getattr(config, "is_sdxl", False):
compiler_args.append("--enable-fast-loading-neuron-binaries")
compiler_args.append("--model-type=unet-inference")
Expand Down
8 changes: 6 additions & 2 deletions optimum/exporters/neuron/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,16 @@ class LevitNeuronConfig(ViTNeuronConfig):
pass


@register_in_tasks_manager("mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation"])
@register_in_tasks_manager(
"mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"]
)
class MobileNetV2NeuronConfig(ViTNeuronConfig):
pass


@register_in_tasks_manager("mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation"])
@register_in_tasks_manager(
"mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"]
)
class MobileViTNeuronConfig(ViTNeuronConfig):
pass

Expand Down
40 changes: 22 additions & 18 deletions optimum/exporters/neuron/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,15 @@
f"We found an older version of diffusers {_diffusers_version} but we require diffusers to be >= {DIFFUSERS_MINIMUM_VERSION}. "
"Please update diffusers by running `pip install --upgrade diffusers`"
)
from diffusers import ControlNetModel, UNet2DConditionModel
from diffusers import (
ControlNetModel,
ModelMixin,
StableDiffusionPipeline,
StableDiffusionXLImg2ImgPipeline,
StableDiffusionXLInpaintPipeline,
StableDiffusionXLPipeline,
UNet2DConditionModel,
)
from diffusers.models.attention_processor import Attention


Expand All @@ -61,9 +69,6 @@

from .base import NeuronDefaultConfig

if is_diffusers_available():
from diffusers import ModelMixin, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline


def build_stable_diffusion_components_mandatory_shapes(
batch_size: Optional[int] = None,
Expand Down Expand Up @@ -107,8 +112,7 @@ def build_stable_diffusion_components_mandatory_shapes(


def get_stable_diffusion_models_for_export(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
task: str,
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
text_encoder_input_shapes: Dict[str, int],
unet_input_shapes: Dict[str, int],
vae_encoder_input_shapes: Dict[str, int],
Expand All @@ -129,10 +133,8 @@ def get_stable_diffusion_models_for_export(
performance benefit (CLIP text encoder, VAE encoder, VAE decoder, Unet).
Args:
pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"]`]):
pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"]`]):
The model to export.
task (`str`):
Task name, should be either "stable-diffusion" or "stable-diffusion-xl".
text_encoder_input_shapes (`Dict[str, int]`):
Static shapes used for compiling text encoder.
unet_input_shapes (`Dict[str, int]`):
Expand Down Expand Up @@ -165,7 +167,6 @@ def get_stable_diffusion_models_for_export(
"""
models_for_export = get_submodels_for_export_stable_diffusion(
pipeline=pipeline,
task=task,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
Expand Down Expand Up @@ -225,8 +226,10 @@ def get_stable_diffusion_models_for_export(
dynamic_batch_size=dynamic_batch_size,
**unet_input_shapes,
)
if task == "stable-diffusion-xl":
unet_neuron_config.is_sdxl = True
is_stable_diffusion_xl = isinstance(
pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline)
)
unet_neuron_config.is_sdxl = is_stable_diffusion_xl

unet_neuron_config.with_controlnet = True if controlnet_ids else False

Expand Down Expand Up @@ -295,7 +298,7 @@ def get_stable_diffusion_models_for_export(


def _load_lora_weights_to_pipeline(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
lora_model_ids: Optional[Union[str, List[str]]] = None,
weight_names: Optional[Union[str, List[str]]] = None,
adapter_names: Optional[Union[str, List[str]]] = None,
Expand Down Expand Up @@ -349,8 +352,7 @@ def load_controlnets(controlnet_ids: Optional[Union[str, List[str]]] = None):


def get_submodels_for_export_stable_diffusion(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
task: str,
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
output_hidden_states: bool = False,
lora_model_ids: Optional[Union[str, List[str]]] = None,
lora_weight_names: Optional[Union[str, List[str]]] = None,
Expand All @@ -361,7 +363,9 @@ def get_submodels_for_export_stable_diffusion(
"""
Returns the components of a Stable Diffusion model.
"""
is_sdxl = "xl" in task
is_stable_diffusion_xl = isinstance(
pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline)
)

# Lora
pipeline = _load_lora_weights_to_pipeline(
Expand All @@ -380,7 +384,7 @@ def get_submodels_for_export_stable_diffusion(

# Text encoders
if pipeline.text_encoder is not None:
if is_sdxl or output_hidden_states:
if is_stable_diffusion_xl or output_hidden_states:
pipeline.text_encoder.config.output_hidden_states = True
models_for_export.append((DIFFUSION_MODEL_TEXT_ENCODER_NAME, copy.deepcopy(pipeline.text_encoder)))

Expand All @@ -399,7 +403,7 @@ def get_submodels_for_export_stable_diffusion(
# Replace original cross-attention module with custom cross-attention module for better performance
# For applying optimized attention score, we need to set env variable `NEURON_FUSE_SOFTMAX=1`
if os.environ.get("NEURON_FUSE_SOFTMAX") == "1":
if is_sdxl:
if is_stable_diffusion_xl:
logger.info("Applying optimized attention score computation for sdxl.")
Attention.get_attention_scores = get_attention_scores_sdxl
else:
Expand Down
1 change: 1 addition & 0 deletions optimum/neuron/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,7 @@ def _export(
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
library_name=cls.library_name,
force_download=force_download,
local_files_only=local_files_only,
token=token,
Expand Down
6 changes: 1 addition & 5 deletions optimum/neuron/modeling_traced.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ def _export(
config: "PretrainedConfig",
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
library_name: Optional[str] = None,
force_download: bool = False,
cache_dir: Optional[str] = None,
compiler_workdir: Optional[Union[str, Path]] = None,
Expand Down Expand Up @@ -275,7 +274,6 @@ def _export(
if task is None:
task = TasksManager.infer_task_from_model(cls.auto_model_class)
task = TasksManager.map_from_synonym(task)
library_name = TasksManager.infer_library_from_model(model_id, subfolder=subfolder, library_name=library_name)

# Get compilation arguments
if is_neuron_available() and dynamic_batch_size is True and "batch_size" in kwargs_shapes:
Expand Down Expand Up @@ -320,10 +318,9 @@ def _export(
model_name_or_path=model_id,
subfolder=subfolder,
revision=revision,
framework="pt",
library_name=library_name,
cache_dir=cache_dir,
token=token,
framework="pt",
local_files_only=local_files_only,
force_download=force_download,
trust_remote_code=trust_remote_code,
Expand Down Expand Up @@ -361,7 +358,6 @@ def _export(
local_files_only=local_files_only,
token=token,
do_validation=False,
library_name=library_name,
**kwargs_shapes,
)
config = AutoConfig.from_pretrained(save_dir_path)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
INSTALL_REQUIRES = [
"transformers == 4.43.2",
"accelerate == 0.29.2",
"optimum ~= 1.21.0",
"optimum ~= 1.22.0",
"huggingface_hub >= 0.20.1",
"numpy>=1.22.2, <=1.25.2",
"protobuf<4",
Expand Down
Loading

0 comments on commit 8445656

Please sign in to comment.