diff --git a/fooocus_api_version.py b/fooocus_api_version.py index 5ff2b60..9adc13c 100644 --- a/fooocus_api_version.py +++ b/fooocus_api_version.py @@ -1 +1 @@ -version = '0.4.0.7' +version = '0.4.1.0' diff --git a/fooocusapi/models/common/base.py b/fooocusapi/models/common/base.py index 46fefbf..1bb6031 100644 --- a/fooocusapi/models/common/base.py +++ b/fooocusapi/models/common/base.py @@ -21,6 +21,7 @@ class PerformanceSelection(str, Enum): quality = 'Quality' extreme_speed = 'Extreme Speed' lightning = 'Lightning' + hyper_sd = 'Hyper-SD' class Lora(BaseModel): diff --git a/fooocusapi/models/common/image_meta.py b/fooocusapi/models/common/image_meta.py index f5bdc38..750cc32 100644 --- a/fooocusapi/models/common/image_meta.py +++ b/fooocusapi/models/common/image_meta.py @@ -42,6 +42,7 @@ class ImageMeta(BaseModel): guidance_scale: float sharpness: float steps: int + vae_name: str version: str = version @@ -101,6 +102,7 @@ def image_parse( guidance_scale=req_param.guidance_scale, sharpness=req_param.sharpness, steps=-1, + vae_name=req_param.advanced_params.vae_name, version=version ) if meta.metadata_scheme not in ["fooocus", "a111"]: diff --git a/fooocusapi/models/common/requests.py b/fooocusapi/models/common/requests.py index 6b1560c..a7e5821 100644 --- a/fooocusapi/models/common/requests.py +++ b/fooocusapi/models/common/requests.py @@ -21,9 +21,13 @@ default_inpaint_engine_version, default_overwrite_switch, default_cfg_tsnr, - default_sample_sharpness + default_sample_sharpness, + default_vae, + default_clip_skip ) +from modules.flags import clip_skip_max + from fooocusapi.models.common.base import ( PerformanceSelection, Lora, @@ -50,6 +54,7 @@ class AdvancedParams(BaseModel): adm_scaler_negative: float = Field(0.8, description="Negative ADM Guidance Scaler", ge=0.1, le=3.0) adm_scaler_end: float = Field(0.3, description="ADM Guidance End At Step", ge=0.0, le=1.0) adaptive_cfg: float = Field(default_cfg_tsnr, description="CFG Mimicking from TSNR", ge=1.0, le=30.0) + clip_skip: int = Field(default_clip_skip, description="Clip Skip", ge=1, le=clip_skip_max) sampler_name: str = Field(default_sampler, description="Sampler") scheduler_name: str = Field(default_scheduler, description="Scheduler") overwrite_step: int = Field(default_overwrite_step, description="Forced Overwrite of Sampling Step", ge=-1, le=200) @@ -79,6 +84,8 @@ class AdvancedParams(BaseModel): inpaint_mask_upload_checkbox: bool = Field(False, description="Upload Mask") invert_mask_checkbox: bool = Field(False, description="Invert Mask") inpaint_erode_or_dilate: int = Field(0, description="Mask Erode or Dilate", ge=-64, le=64) + black_out_nsfw: bool = Field(False, description="Block out NSFW") + vae_name: str = Field(default_vae, description="VAE name") class CommonRequest(BaseModel): diff --git a/fooocusapi/worker.py b/fooocusapi/worker.py index 580f2db..c1d6d37 100644 --- a/fooocusapi/worker.py +++ b/fooocusapi/worker.py @@ -12,7 +12,6 @@ from fooocusapi.models.common.image_meta import image_parse from modules.patch import PatchSettings, patch_settings, patch_all -from modules.sdxl_styles import apply_arrays from modules.flags import Performance from fooocusapi.utils.file_utils import save_output_file @@ -100,10 +99,20 @@ def process_generate(async_task: QueueTask): import extras.ip_adapter as ip_adapter import extras.face_crop as face_crop import ldm_patched.modules.model_management as model_management - from modules.util import remove_empty_str, resize_image, HWC3, set_image_shape_ceil, get_image_shape_ceil, get_shape_ceil, resample_image, erode_or_dilate + from modules.util import ( + remove_empty_str, HWC3, resize_image, + get_image_shape_ceil, set_image_shape_ceil, + get_shape_ceil, resample_image, erode_or_dilate, + get_enabled_loras, parse_lora_references_from_prompt, apply_wildcards + ) + from modules.upscaler import perform_upscale from extras.expansion import safe_str - from modules.sdxl_styles import apply_style, fooocus_expansion, apply_wildcards + from extras.censor import default_censor + from modules.sdxl_styles import ( + apply_style, get_random_style, + fooocus_expansion, apply_arrays, random_style_name + ) pid = os.getpid() @@ -132,18 +141,24 @@ def progressbar(_, number, text): logger.std_info(f'[Fooocus] {text}') outputs.append(['preview', (number, text, None)]) - def yield_result(_, images, tasks, extension='png'): + def yield_result(_, images, tasks, extension='png', + blockout_nsfw=False, censor=True): """ Yield result :param _: async task object :param images: list for generated image :param tasks: the image was generated one by one, when image number is not one, it will be a task list :param extension: extension for saved image + :param blockout_nsfw: blockout nsfw image + :param censor: censor image :return: """ if not isinstance(images, list): images = [images] + if censor and (config.default_black_out_nsfw or black_out_nsfw): + images = default_censor(images) + results = [] for index, im in enumerate(images): if async_task.req_param.save_name == '': @@ -262,6 +277,9 @@ def yield_result(_, images, tasks, extension='png'): inpaint_mask_upload_checkbox = adp.inpaint_mask_upload_checkbox invert_mask_checkbox = adp.invert_mask_checkbox inpaint_erode_or_dilate = adp.inpaint_erode_or_dilate + black_out_nsfw = adp.black_out_nsfw + vae_name = adp.vae_name + clip_skip = adp.clip_skip cn_tasks = {x: [] for x in flags.ip_list} for img_prompt in params.image_prompts: @@ -299,10 +317,12 @@ def yield_result(_, images, tasks, extension='png'): steps = performance_selection.steps() + performance_loras = [] + if performance_selection == Performance.EXTREME_SPEED: logger.std_warn('[Fooocus] Enter LCM mode.') progressbar(async_task, 1, 'Downloading LCM components ...') - loras += [(config.downloading_sdxl_lcm_lora(), 1.0)] + performance_loras += [(config.downloading_sdxl_lcm_lora(), 1.0)] if refiner_model_name != 'None': logger.std_info('[Fooocus] Refiner disabled in LCM mode.') @@ -321,7 +341,7 @@ def yield_result(_, images, tasks, extension='png'): elif performance_selection == Performance.LIGHTNING: logger.std_info('[Fooocus] Enter Lightning mode.') progressbar(async_task, 1, 'Downloading Lightning components ...') - loras += [(config.downloading_sdxl_lightning_lora(), 1.0)] + performance_loras += [(config.downloading_sdxl_lightning_lora(), 1.0)] if refiner_model_name != 'None': logger.std_info('[Fooocus] Refiner disabled in Lightning mode.') @@ -337,7 +357,27 @@ def yield_result(_, images, tasks, extension='png'): adm_scaler_negative = 1.0 adm_scaler_end = 0.0 + elif performance_selection == Performance.HYPER_SD: + print('Enter Hyper-SD mode.') + progressbar(async_task, 1, 'Downloading Hyper-SD components ...') + performance_loras += [(config.downloading_sdxl_hyper_sd_lora(), 0.8)] + + if refiner_model_name != 'None': + logger.std_info('[Fooocus] Refiner disabled in Hyper-SD mode.') + + refiner_model_name = 'None' + sampler_name = 'dpmpp_sde_gpu' + scheduler_name = 'karras' + sharpness = 0.0 + guidance_scale = 1.0 + adaptive_cfg = 1.0 + refiner_switch = 1.0 + adm_scaler_positive = 1.0 + adm_scaler_negative = 1.0 + adm_scaler_end = 0.0 + logger.std_info(f'[Parameters] Adaptive CFG = {adaptive_cfg}') + logger.std_info(f'[Parameters] CLIP Skip = {clip_skip}') logger.std_info(f'[Parameters] Sharpness = {sharpness}') logger.std_info(f'[Parameters] ControlNet Softness = {controlnet_softness}') logger.std_info(f'[Parameters] ADM Scale = ' @@ -500,6 +540,8 @@ def yield_result(_, images, tasks, extension='png'): extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else [] progressbar(async_task, 3, 'Loading models ...') + loras, prompt = parse_lora_references_from_prompt(prompt, loras, config.default_max_lora_number) + loras += performance_loras pipeline.refresh_everything( refiner_model_name=refiner_model_name, base_model_name=base_model_name, @@ -507,6 +549,8 @@ def yield_result(_, images, tasks, extension='png'): base_model_additional_loras=base_model_additional_loras, use_synthetic_refiner=use_synthetic_refiner) + pipeline.set_clip_skip(clip_skip) + progressbar(async_task, 3, 'Processing prompts ...') tasks = [] @@ -520,15 +564,21 @@ def yield_result(_, images, tasks, extension='png'): task_prompt = apply_wildcards(prompt, task_rng, i, read_wildcards_in_order) task_prompt = apply_arrays(task_prompt, i) task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, read_wildcards_in_order) - task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in extra_positive_prompts] - task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in extra_negative_prompts] + task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in + extra_positive_prompts] + task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in + extra_negative_prompts] positive_basic_workloads = [] negative_basic_workloads = [] + task_styles = style_selections.copy() if use_style: - for s in style_selections: - p, n = apply_style(s, positive=task_prompt) + for index, style in enumerate(task_styles): + if style == random_style_name: + style = get_random_style(task_rng) + task_styles[index] = style + p, n = apply_style(style, positive=task_prompt) positive_basic_workloads = positive_basic_workloads + p negative_basic_workloads = negative_basic_workloads + n else: @@ -555,29 +605,30 @@ def yield_result(_, images, tasks, extension='png'): negative_top_k=len(negative_basic_workloads), log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts), log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts), + styles=task_styles )) if use_expansion: for i, t in enumerate(tasks): - progressbar(async_task, 5, f'Preparing Fooocus text #{i + 1} ...') + progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...') expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed']) logger.std_info(f'[Prompt Expansion] {expansion}') t['expansion'] = expansion t['positive'] = copy.deepcopy(t['positive']) + [expansion] # Deep copy. for i, t in enumerate(tasks): - progressbar(async_task, 7, f'Encoding positive #{i + 1} ...') + progressbar(async_task, 5, f'Encoding positive #{i + 1} ...') t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k']) for i, t in enumerate(tasks): if abs(float(cfg_scale) - 1.0) < 1e-4: t['uc'] = pipeline.clone_cond(t['c']) else: - progressbar(async_task, 10, f'Encoding negative #{i + 1} ...') + progressbar(async_task, 6, f'Encoding negative #{i + 1} ...') t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k']) if len(goals) > 0: - progressbar(async_task, 13, 'Image processing ...') + progressbar(async_task, 7, 'Image processing ...') if 'vary' in goals: if 'subtle' in uov_method: @@ -598,7 +649,7 @@ def yield_result(_, images, tasks, extension='png'): uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil) initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 13, 'VAE encoding ...') + progressbar(async_task, 8, 'VAE encoding ...') candidate_vae, _ = pipeline.get_candidate_vae( steps=steps, @@ -615,7 +666,7 @@ def yield_result(_, images, tasks, extension='png'): if 'upscale' in goals: H, W, C = uov_input_image.shape - progressbar(async_task, 13, f'Upscaling image from {str((H, W))} ...') + progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...') uov_input_image = perform_upscale(uov_input_image) logger.std_info('[Upscale] Image upscale.') @@ -653,7 +704,9 @@ def yield_result(_, images, tasks, extension='png'): if direct_return: # d = [('Upscale (Fast)', '2x')] # log(uov_input_image, d, output_format=save_extension) - yield_result(async_task, uov_input_image, tasks, save_extension) + if config.default_black_out_nsfw or black_out_nsfw: + uov_input_image = default_censor(uov_input_image) + yield_result(async_task, uov_input_image, tasks, save_extension, False, False) return tiled = True @@ -663,7 +716,7 @@ def yield_result(_, images, tasks, extension='png'): denoising_strength = overwrite_upscale_strength initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 13, 'VAE encoding ...') + progressbar(async_task, 10, 'VAE encoding ...') candidate_vae, _ = pipeline.get_candidate_vae( steps=steps, @@ -735,10 +788,11 @@ def yield_result(_, images, tasks, extension='png'): ) if debugging_inpaint_preprocessor: - yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), tasks) + yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), tasks, + black_out_nsfw) return - progressbar(async_task, 13, 'VAE Inpaint encoding ...') + progressbar(async_task, 11, 'VAE Inpaint encoding ...') inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill) inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image) @@ -758,7 +812,7 @@ def yield_result(_, images, tasks, extension='png'): latent_swap = None if candidate_vae_swap is not None: - progressbar(async_task, 13, 'VAE SD15 encoding ...') + progressbar(async_task, 12, 'VAE SD15 encoding ...') latent_swap = core.encode_vae( vae=candidate_vae_swap, pixels=inpaint_pixel_fill)['samples'] @@ -798,7 +852,7 @@ def yield_result(_, images, tasks, extension='png'): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, tasks, save_extension) + yield_result(async_task, cn_img, tasks, save_extension, black_out_nsfw) return for task in cn_tasks[flags.cn_cpds]: cn_img, cn_stop, cn_weight = task @@ -810,7 +864,7 @@ def yield_result(_, images, tasks, extension='png'): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, tasks, save_extension) + yield_result(async_task, cn_img, tasks, save_extension, black_out_nsfw) return for task in cn_tasks[flags.cn_ip]: cn_img, cn_stop, cn_weight = task @@ -821,7 +875,7 @@ def yield_result(_, images, tasks, extension='png'): task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, tasks, save_extension) + yield_result(async_task, cn_img, tasks, save_extension, black_out_nsfw) return for task in cn_tasks[flags.cn_ip_face]: cn_img, cn_stop, cn_weight = task @@ -835,7 +889,7 @@ def yield_result(_, images, tasks, extension='png'): task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, tasks, save_extension) + yield_result(async_task, cn_img, tasks, save_extension, black_out_nsfw) return all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face] @@ -870,19 +924,19 @@ def yield_result(_, images, tasks, extension='png'): final_sampler_name = sampler_name final_scheduler_name = scheduler_name - if scheduler_name == 'lcm': + if scheduler_name in ['lcm', 'tcd']: final_scheduler_name = 'sgm_uniform' if pipeline.final_unet is not None: pipeline.final_unet = core.opModelSamplingDiscrete.patch( pipeline.final_unet, - sampling='lcm', + sampling=scheduler_name, zsnr=False)[0] if pipeline.final_refiner_unet is not None: pipeline.final_refiner_unet = core.opModelSamplingDiscrete.patch( pipeline.final_refiner_unet, - sampling='lcm', + sampling=scheduler_name, zsnr=False)[0] - logger.std_info('[Fooocus] Using lcm scheduler.') + logger.std_info(f'[Fooocus] Using {scheduler_name} scheduler.') outputs.append(['preview', (13, 'Moving model to GPU ...', None)]) @@ -957,7 +1011,7 @@ def callback(step, x0, x, total_steps, y): if async_task.finish_with_error: worker_queue.finish_task(async_task.job_id) return async_task.task_result - yield_result(None, results, tasks, save_extension) + yield_result(None, results, tasks, save_extension, black_out_nsfw) return except Exception as e: logger.std_error(f'[Fooocus] Worker error: {e}') diff --git a/repositories/Fooocus/args_manager.py b/repositories/Fooocus/args_manager.py index 6a3ae9d..e023da2 100644 --- a/repositories/Fooocus/args_manager.py +++ b/repositories/Fooocus/args_manager.py @@ -31,6 +31,9 @@ args_parser.parser.add_argument("--disable-preset-download", action='store_true', help="Disables downloading models for presets", default=False) +args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true', + help="Disables automatic description of uov images when prompt is empty", default=False) + args_parser.parser.add_argument("--always-download-new-model", action='store_true', help="Always download newer models ", default=False) diff --git a/repositories/Fooocus/extras/censor.py b/repositories/Fooocus/extras/censor.py new file mode 100644 index 0000000..45617fd --- /dev/null +++ b/repositories/Fooocus/extras/censor.py @@ -0,0 +1,60 @@ +import os + +import numpy as np +import torch +from transformers import CLIPConfig, CLIPImageProcessor + +import ldm_patched.modules.model_management as model_management +import modules.config +from extras.safety_checker.models.safety_checker import StableDiffusionSafetyChecker +from ldm_patched.modules.model_patcher import ModelPatcher + +safety_checker_repo_root = os.path.join(os.path.dirname(__file__), 'safety_checker') +config_path = os.path.join(safety_checker_repo_root, "configs", "config.json") +preprocessor_config_path = os.path.join(safety_checker_repo_root, "configs", "preprocessor_config.json") + + +class Censor: + def __init__(self): + self.safety_checker_model: ModelPatcher | None = None + self.clip_image_processor: CLIPImageProcessor | None = None + self.load_device = torch.device('cpu') + self.offload_device = torch.device('cpu') + + def init(self): + if self.safety_checker_model is None and self.clip_image_processor is None: + safety_checker_model = modules.config.downloading_safety_checker_model() + self.clip_image_processor = CLIPImageProcessor.from_json_file(preprocessor_config_path) + clip_config = CLIPConfig.from_json_file(config_path) + model = StableDiffusionSafetyChecker.from_pretrained(safety_checker_model, config=clip_config) + model.eval() + + self.load_device = model_management.text_encoder_device() + self.offload_device = model_management.text_encoder_offload_device() + + model.to(self.offload_device) + + self.safety_checker_model = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device) + + def censor(self, images: list | np.ndarray) -> list | np.ndarray: + self.init() + model_management.load_model_gpu(self.safety_checker_model) + + single = False + if not isinstance(images, list) or isinstance(images, np.ndarray): + images = [images] + single = True + + safety_checker_input = self.clip_image_processor(images, return_tensors="pt") + safety_checker_input.to(device=self.load_device) + checked_images, has_nsfw_concept = self.safety_checker_model.model(images=images, + clip_input=safety_checker_input.pixel_values) + checked_images = [image.astype(np.uint8) for image in checked_images] + + if single: + checked_images = checked_images[0] + + return checked_images + + +default_censor = Censor().censor diff --git a/repositories/Fooocus/extras/safety_checker/configs/config.json b/repositories/Fooocus/extras/safety_checker/configs/config.json new file mode 100644 index 0000000..aa454d2 --- /dev/null +++ b/repositories/Fooocus/extras/safety_checker/configs/config.json @@ -0,0 +1,171 @@ +{ + "_name_or_path": "clip-vit-large-patch14/", + "architectures": [ + "SafetyChecker" + ], + "initializer_factor": 1.0, + "logit_scale_init_value": 2.6592, + "model_type": "clip", + "projection_dim": 768, + "text_config": { + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "bos_token_id": 0, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "dropout": 0.0, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": 2, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "quick_gelu", + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 3072, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "layer_norm_eps": 1e-05, + "length_penalty": 1.0, + "max_length": 20, + "max_position_embeddings": 77, + "min_length": 0, + "model_type": "clip_text_model", + "no_repeat_ngram_size": 0, + "num_attention_heads": 12, + "num_beam_groups": 1, + "num_beams": 1, + "num_hidden_layers": 12, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": 1, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "task_specific_params": null, + "temperature": 1.0, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.21.0.dev0", + "typical_p": 1.0, + "use_bfloat16": false, + "vocab_size": 49408 + }, + "text_config_dict": { + "hidden_size": 768, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12 + }, + "torch_dtype": "float32", + "transformers_version": null, + "vision_config": { + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "dropout": 0.0, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "quick_gelu", + "hidden_size": 1024, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "image_size": 224, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 4096, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "layer_norm_eps": 1e-05, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "clip_vision_model", + "no_repeat_ngram_size": 0, + "num_attention_heads": 16, + "num_beam_groups": 1, + "num_beams": 1, + "num_hidden_layers": 24, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "patch_size": 14, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "task_specific_params": null, + "temperature": 1.0, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.21.0.dev0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "vision_config_dict": { + "hidden_size": 1024, + "intermediate_size": 4096, + "num_attention_heads": 16, + "num_hidden_layers": 24, + "patch_size": 14 + } +} diff --git a/repositories/Fooocus/extras/safety_checker/configs/preprocessor_config.json b/repositories/Fooocus/extras/safety_checker/configs/preprocessor_config.json new file mode 100644 index 0000000..5294955 --- /dev/null +++ b/repositories/Fooocus/extras/safety_checker/configs/preprocessor_config.json @@ -0,0 +1,20 @@ +{ + "crop_size": 224, + "do_center_crop": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_resize": true, + "feature_extractor_type": "CLIPFeatureExtractor", + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "resample": 3, + "size": 224 +} diff --git a/repositories/Fooocus/extras/safety_checker/models/safety_checker.py b/repositories/Fooocus/extras/safety_checker/models/safety_checker.py new file mode 100644 index 0000000..ea38bf0 --- /dev/null +++ b/repositories/Fooocus/extras/safety_checker/models/safety_checker.py @@ -0,0 +1,126 @@ +# from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/safety_checker.py + +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch.nn as nn +from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel +from transformers.utils import logging + +logger = logging.get_logger(__name__) + + +def cosine_distance(image_embeds, text_embeds): + normalized_image_embeds = nn.functional.normalize(image_embeds) + normalized_text_embeds = nn.functional.normalize(text_embeds) + return torch.mm(normalized_image_embeds, normalized_text_embeds.t()) + + +class StableDiffusionSafetyChecker(PreTrainedModel): + config_class = CLIPConfig + main_input_name = "clip_input" + + _no_split_modules = ["CLIPEncoderLayer"] + + def __init__(self, config: CLIPConfig): + super().__init__(config) + + self.vision_model = CLIPVisionModel(config.vision_config) + self.visual_projection = nn.Linear(config.vision_config.hidden_size, config.projection_dim, bias=False) + + self.concept_embeds = nn.Parameter(torch.ones(17, config.projection_dim), requires_grad=False) + self.special_care_embeds = nn.Parameter(torch.ones(3, config.projection_dim), requires_grad=False) + + self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False) + self.special_care_embeds_weights = nn.Parameter(torch.ones(3), requires_grad=False) + + @torch.no_grad() + def forward(self, clip_input, images): + pooled_output = self.vision_model(clip_input)[1] # pooled_output + image_embeds = self.visual_projection(pooled_output) + + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 + special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds).cpu().float().numpy() + cos_dist = cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy() + + result = [] + batch_size = image_embeds.shape[0] + for i in range(batch_size): + result_img = {"special_scores": {}, "special_care": [], "concept_scores": {}, "bad_concepts": []} + + # increase this value to create a stronger `nfsw` filter + # at the cost of increasing the possibility of filtering benign images + adjustment = 0.0 + + for concept_idx in range(len(special_cos_dist[0])): + concept_cos = special_cos_dist[i][concept_idx] + concept_threshold = self.special_care_embeds_weights[concept_idx].item() + result_img["special_scores"][concept_idx] = round(concept_cos - concept_threshold + adjustment, 3) + if result_img["special_scores"][concept_idx] > 0: + result_img["special_care"].append({concept_idx, result_img["special_scores"][concept_idx]}) + adjustment = 0.01 + + for concept_idx in range(len(cos_dist[0])): + concept_cos = cos_dist[i][concept_idx] + concept_threshold = self.concept_embeds_weights[concept_idx].item() + result_img["concept_scores"][concept_idx] = round(concept_cos - concept_threshold + adjustment, 3) + if result_img["concept_scores"][concept_idx] > 0: + result_img["bad_concepts"].append(concept_idx) + + result.append(result_img) + + has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result] + + for idx, has_nsfw_concept in enumerate(has_nsfw_concepts): + if has_nsfw_concept: + if torch.is_tensor(images) or torch.is_tensor(images[0]): + images[idx] = torch.zeros_like(images[idx]) # black image + else: + images[idx] = np.zeros(images[idx].shape) # black image + + if any(has_nsfw_concepts): + logger.warning( + "Potential NSFW content was detected in one or more images. A black image will be returned instead." + " Try again with a different prompt and/or seed." + ) + + return images, has_nsfw_concepts + + @torch.no_grad() + def forward_onnx(self, clip_input: torch.Tensor, images: torch.Tensor): + pooled_output = self.vision_model(clip_input)[1] # pooled_output + image_embeds = self.visual_projection(pooled_output) + + special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds) + cos_dist = cosine_distance(image_embeds, self.concept_embeds) + + # increase this value to create a stronger `nsfw` filter + # at the cost of increasing the possibility of filtering benign images + adjustment = 0.0 + + special_scores = special_cos_dist - self.special_care_embeds_weights + adjustment + # special_scores = special_scores.round(decimals=3) + special_care = torch.any(special_scores > 0, dim=1) + special_adjustment = special_care * 0.01 + special_adjustment = special_adjustment.unsqueeze(1).expand(-1, cos_dist.shape[1]) + + concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment + # concept_scores = concept_scores.round(decimals=3) + has_nsfw_concepts = torch.any(concept_scores > 0, dim=1) + + images[has_nsfw_concepts] = 0.0 # black image + + return images, has_nsfw_concepts diff --git a/repositories/Fooocus/extras/vae_interpose.py b/repositories/Fooocus/extras/vae_interpose.py index 72fb09a..d407ca8 100644 --- a/repositories/Fooocus/extras/vae_interpose.py +++ b/repositories/Fooocus/extras/vae_interpose.py @@ -1,69 +1,85 @@ # https://github.com/city96/SD-Latent-Interposer/blob/main/interposer.py import os -import torch + import safetensors.torch as sf +import torch import torch.nn as nn -import ldm_patched.modules.model_management +import ldm_patched.modules.model_management from ldm_patched.modules.model_patcher import ModelPatcher from modules.config import path_vae_approx -class Block(nn.Module): - def __init__(self, size): +class ResBlock(nn.Module): + """Block with residuals""" + + def __init__(self, ch): super().__init__() self.join = nn.ReLU() + self.norm = nn.BatchNorm2d(ch) self.long = nn.Sequential( - nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), + nn.Conv2d(ch, ch, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch, ch, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch, ch, kernel_size=3, stride=1, padding=1), + nn.Dropout(0.1) ) def forward(self, x): - y = self.long(x) - z = self.join(y + x) - return z + x = self.norm(x) + return self.join(self.long(x) + x) + +class ExtractBlock(nn.Module): + """Increase no. of channels by [out/in]""" -class Interposer(nn.Module): - def __init__(self): + def __init__(self, ch_in, ch_out): super().__init__() - self.chan = 4 - self.hid = 128 - - self.head_join = nn.ReLU() - self.head_short = nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1) - self.head_long = nn.Sequential( - nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), + self.join = nn.ReLU() + self.short = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1) + self.long = nn.Sequential( + nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1), + nn.Dropout(0.1) ) + + def forward(self, x): + return self.join(self.long(x) + self.short(x)) + + +class InterposerModel(nn.Module): + """Main neural network""" + + def __init__(self, ch_in=4, ch_out=4, ch_mid=64, scale=1.0, blocks=12): + super().__init__() + self.ch_in = ch_in + self.ch_out = ch_out + self.ch_mid = ch_mid + self.blocks = blocks + self.scale = scale + + self.head = ExtractBlock(self.ch_in, self.ch_mid) self.core = nn.Sequential( - Block(self.hid), - Block(self.hid), - Block(self.hid), - ) - self.tail = nn.Sequential( - nn.ReLU(), - nn.Conv2d(self.hid, self.chan, kernel_size=3, stride=1, padding=1) + nn.Upsample(scale_factor=self.scale, mode="nearest"), + *[ResBlock(self.ch_mid) for _ in range(blocks)], + nn.BatchNorm2d(self.ch_mid), + nn.SiLU(), ) + self.tail = nn.Conv2d(self.ch_mid, self.ch_out, kernel_size=3, stride=1, padding=1) def forward(self, x): - y = self.head_join( - self.head_long(x) + - self.head_short(x) - ) + y = self.head(x) z = self.core(y) return self.tail(z) vae_approx_model = None -vae_approx_filename = os.path.join(path_vae_approx, 'xl-to-v1_interposer-v3.1.safetensors') +vae_approx_filename = os.path.join(path_vae_approx, 'xl-to-v1_interposer-v4.0.safetensors') def parse(x): @@ -72,7 +88,7 @@ def parse(x): x_origin = x.clone() if vae_approx_model is None: - model = Interposer() + model = InterposerModel() model.eval() sd = sf.load_file(vae_approx_filename) model.load_state_dict(sd) diff --git a/repositories/Fooocus/fooocus_version.py b/repositories/Fooocus/fooocus_version.py index b205019..7501145 100644 --- a/repositories/Fooocus/fooocus_version.py +++ b/repositories/Fooocus/fooocus_version.py @@ -1 +1 @@ -version = '2.3.1' +version = '2.4.1' diff --git a/repositories/Fooocus/ldm_patched/contrib/external_align_your_steps.py b/repositories/Fooocus/ldm_patched/contrib/external_align_your_steps.py new file mode 100644 index 0000000..624bbce --- /dev/null +++ b/repositories/Fooocus/ldm_patched/contrib/external_align_your_steps.py @@ -0,0 +1,55 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +#from: https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html +import numpy as np +import torch + +def loglinear_interp(t_steps, num_steps): + """ + Performs log-linear interpolation of a given array of decreasing numbers. + """ + xs = np.linspace(0, 1, len(t_steps)) + ys = np.log(t_steps[::-1]) + + new_xs = np.linspace(0, 1, num_steps) + new_ys = np.interp(new_xs, xs, ys) + + interped_ys = np.exp(new_ys)[::-1].copy() + return interped_ys + +NOISE_LEVELS = {"SD1": [14.6146412293, 6.4745760956, 3.8636745985, 2.6946151520, 1.8841921177, 1.3943805092, 0.9642583904, 0.6523686016, 0.3977456272, 0.1515232662, 0.0291671582], + "SDXL":[14.6146412293, 6.3184485287, 3.7681790315, 2.1811480769, 1.3405244945, 0.8620721141, 0.5550693289, 0.3798540708, 0.2332364134, 0.1114188177, 0.0291671582], + "SVD": [700.00, 54.5, 15.886, 7.977, 4.248, 1.789, 0.981, 0.403, 0.173, 0.034, 0.002]} + +class AlignYourStepsScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"model_type": (["SD1", "SDXL", "SVD"], ), + "steps": ("INT", {"default": 10, "min": 10, "max": 10000}), + "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, model_type, steps, denoise): + total_steps = steps + if denoise < 1.0: + if denoise <= 0.0: + return (torch.FloatTensor([]),) + total_steps = round(steps * denoise) + + sigmas = NOISE_LEVELS[model_type][:] + if (steps + 1) != len(sigmas): + sigmas = loglinear_interp(sigmas, steps + 1) + + sigmas = sigmas[-(total_steps + 1):] + sigmas[-1] = 0 + return (torch.FloatTensor(sigmas), ) + +NODE_CLASS_MAPPINGS = { + "AlignYourStepsScheduler": AlignYourStepsScheduler, +} \ No newline at end of file diff --git a/repositories/Fooocus/ldm_patched/contrib/external_custom_sampler.py b/repositories/Fooocus/ldm_patched/contrib/external_custom_sampler.py index 8f92e84..985b03a 100644 --- a/repositories/Fooocus/ldm_patched/contrib/external_custom_sampler.py +++ b/repositories/Fooocus/ldm_patched/contrib/external_custom_sampler.py @@ -230,6 +230,25 @@ def get_sampler(self, eta, s_noise, r, noise_device): sampler = ldm_patched.modules.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "r": r}) return (sampler, ) + +class SamplerTCD: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "eta": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01}), + } + } + RETURN_TYPES = ("SAMPLER",) + CATEGORY = "sampling/custom_sampling/samplers" + + FUNCTION = "get_sampler" + + def get_sampler(self, eta=0.3): + sampler = ldm_patched.modules.samplers.ksampler("tcd", {"eta": eta}) + return (sampler, ) + + class SamplerCustom: @classmethod def INPUT_TYPES(s): @@ -292,6 +311,7 @@ def sample(self, model, add_noise, noise_seed, cfg, positive, negative, sampler, "KSamplerSelect": KSamplerSelect, "SamplerDPMPP_2M_SDE": SamplerDPMPP_2M_SDE, "SamplerDPMPP_SDE": SamplerDPMPP_SDE, + "SamplerTCD": SamplerTCD, "SplitSigmas": SplitSigmas, "FlipSigmas": FlipSigmas, } diff --git a/repositories/Fooocus/ldm_patched/contrib/external_model_advanced.py b/repositories/Fooocus/ldm_patched/contrib/external_model_advanced.py index 03a2f04..9b52c36 100644 --- a/repositories/Fooocus/ldm_patched/contrib/external_model_advanced.py +++ b/repositories/Fooocus/ldm_patched/contrib/external_model_advanced.py @@ -70,7 +70,7 @@ class ModelSamplingDiscrete: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "sampling": (["eps", "v_prediction", "lcm"],), + "sampling": (["eps", "v_prediction", "lcm", "tcd"]), "zsnr": ("BOOLEAN", {"default": False}), }} @@ -90,6 +90,9 @@ def patch(self, model, sampling, zsnr): elif sampling == "lcm": sampling_type = LCM sampling_base = ModelSamplingDiscreteDistilled + elif sampling == "tcd": + sampling_type = ldm_patched.modules.model_sampling.EPS + sampling_base = ModelSamplingDiscreteDistilled class ModelSamplingAdvanced(sampling_base, sampling_type): pass diff --git a/repositories/Fooocus/ldm_patched/k_diffusion/sampling.py b/repositories/Fooocus/ldm_patched/k_diffusion/sampling.py index 761c2e0..d1bc1e4 100644 --- a/repositories/Fooocus/ldm_patched/k_diffusion/sampling.py +++ b/repositories/Fooocus/ldm_patched/k_diffusion/sampling.py @@ -752,7 +752,6 @@ def sample_lcm(model, x, sigmas, extra_args=None, callback=None, disable=None, n return x - @torch.no_grad() def sample_heunpp2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.): # From MIT licensed: https://github.com/Carzit/sd-webui-samplers-scheduler/ @@ -808,3 +807,30 @@ def sample_heunpp2(model, x, sigmas, extra_args=None, callback=None, disable=Non d_prime = w1 * d + w2 * d_2 + w3 * d_3 x = x + d_prime * dt return x + + +@torch.no_grad() +def sample_tcd(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None, eta=0.3): + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + + model_sampling = model.inner_model.inner_model.model_sampling + timesteps_s = torch.floor((1 - eta) * model_sampling.timestep(sigmas)).to(dtype=torch.long).detach().cpu() + timesteps_s[-1] = 0 + alpha_prod_s = model_sampling.alphas_cumprod[timesteps_s] + beta_prod_s = 1 - alpha_prod_s + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) # predicted_original_sample + eps = (x - denoised) / sigmas[i] + denoised = alpha_prod_s[i + 1].sqrt() * denoised + beta_prod_s[i + 1].sqrt() * eps + + if callback is not None: + callback({"x": x, "i": i, "sigma": sigmas[i], "sigma_hat": sigmas[i], "denoised": denoised}) + + x = denoised + if eta > 0 and sigmas[i + 1] > 0: + noise = noise_sampler(sigmas[i], sigmas[i + 1]) + x = x / alpha_prod_s[i+1].sqrt() + noise * (sigmas[i+1]**2 + 1 - 1/alpha_prod_s[i+1]).sqrt() + + return x \ No newline at end of file diff --git a/repositories/Fooocus/ldm_patched/modules/args_parser.py b/repositories/Fooocus/ldm_patched/modules/args_parser.py index 0c6165a..bf87378 100644 --- a/repositories/Fooocus/ldm_patched/modules/args_parser.py +++ b/repositories/Fooocus/ldm_patched/modules/args_parser.py @@ -37,6 +37,7 @@ def __call__(self, parser, namespace, values, option_string=None): parser.add_argument("--port", type=int, default=8188) parser.add_argument("--disable-header-check", type=str, default=None, metavar="ORIGIN", nargs="?", const="*") parser.add_argument("--web-upload-size", type=float, default=100) +parser.add_argument("--hf-mirror", type=str, default=None) parser.add_argument("--external-working-path", type=str, default=None, metavar="PATH", nargs='+', action='append') parser.add_argument("--output-path", type=str, default=None) diff --git a/repositories/Fooocus/ldm_patched/modules/model_sampling.py b/repositories/Fooocus/ldm_patched/modules/model_sampling.py index f39e275..57f51a0 100644 --- a/repositories/Fooocus/ldm_patched/modules/model_sampling.py +++ b/repositories/Fooocus/ldm_patched/modules/model_sampling.py @@ -50,17 +50,17 @@ def _register_schedule(self, given_betas=None, beta_schedule="linear", timesteps self.linear_start = linear_start self.linear_end = linear_end - # self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32)) - # self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32)) - # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32)) - sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5 self.set_sigmas(sigmas) + self.set_alphas_cumprod(alphas_cumprod.float()) def set_sigmas(self, sigmas): self.register_buffer('sigmas', sigmas) self.register_buffer('log_sigmas', sigmas.log()) + def set_alphas_cumprod(self, alphas_cumprod): + self.register_buffer("alphas_cumprod", alphas_cumprod.float()) + @property def sigma_min(self): return self.sigmas[0] diff --git a/repositories/Fooocus/ldm_patched/modules/samplers.py b/repositories/Fooocus/ldm_patched/modules/samplers.py index 1f69d2b..35cb3d7 100644 --- a/repositories/Fooocus/ldm_patched/modules/samplers.py +++ b/repositories/Fooocus/ldm_patched/modules/samplers.py @@ -523,7 +523,7 @@ def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=N KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "heunpp2","dpm_2", "dpm_2_ancestral", "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", - "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm"] + "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm", "tcd"] class KSAMPLER(Sampler): def __init__(self, sampler_function, extra_options={}, inpaint_options={}): diff --git a/repositories/Fooocus/ldm_patched/modules/sd.py b/repositories/Fooocus/ldm_patched/modules/sd.py index e197c39..282f255 100644 --- a/repositories/Fooocus/ldm_patched/modules/sd.py +++ b/repositories/Fooocus/ldm_patched/modules/sd.py @@ -427,12 +427,13 @@ class EmptyClass: return (ldm_patched.modules.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae) -def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True): +def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, vae_filename_param=None): sd = ldm_patched.modules.utils.load_torch_file(ckpt_path) sd_keys = sd.keys() clip = None clipvision = None vae = None + vae_filename = None model = None model_patcher = None clip_target = None @@ -462,8 +463,12 @@ class WeightsLoader(torch.nn.Module): model.load_model_weights(sd, "model.diffusion_model.") if output_vae: - vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"first_stage_model.": ""}, filter_keys=True) - vae_sd = model_config.process_vae_state_dict(vae_sd) + if vae_filename_param is None: + vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"first_stage_model.": ""}, filter_keys=True) + vae_sd = model_config.process_vae_state_dict(vae_sd) + else: + vae_sd = ldm_patched.modules.utils.load_torch_file(vae_filename_param) + vae_filename = vae_filename_param vae = VAE(sd=vae_sd) if output_clip: @@ -485,7 +490,7 @@ class WeightsLoader(torch.nn.Module): print("loaded straight to GPU") model_management.load_model_gpu(model_patcher) - return (model_patcher, clip, vae, clipvision) + return model_patcher, clip, vae, vae_filename, clipvision def load_unet_state_dict(sd): #load unet in diffusers format diff --git a/repositories/Fooocus/models/safety_checker/put_safety_checker_models_here b/repositories/Fooocus/models/safety_checker/put_safety_checker_models_here new file mode 100644 index 0000000..e69de29 diff --git a/repositories/Fooocus/modules/__init__.py b/repositories/Fooocus/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/repositories/Fooocus/modules/async_worker.py b/repositories/Fooocus/modules/async_worker.py index d8a1e07..d7d9b9f 100644 --- a/repositories/Fooocus/modules/async_worker.py +++ b/repositories/Fooocus/modules/async_worker.py @@ -4,6 +4,7 @@ patch_all() + class AsyncTask: def __init__(self, args): self.args = args @@ -43,11 +44,13 @@ def worker(): import fooocus_version import args_manager - from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion, apply_arrays + from extras.censor import default_censor + from modules.sdxl_styles import apply_style, get_random_style, fooocus_expansion, apply_arrays, random_style_name from modules.private_logger import log from extras.expansion import safe_str - from modules.util import remove_empty_str, HWC3, resize_image, get_image_shape_ceil, set_image_shape_ceil, \ - get_shape_ceil, resample_image, erode_or_dilate, ordinal_suffix, get_enabled_loras + from modules.util import (remove_empty_str, HWC3, resize_image, get_image_shape_ceil, set_image_shape_ceil, + get_shape_ceil, resample_image, erode_or_dilate, get_enabled_loras, + parse_lora_references_from_prompt, apply_wildcards) from modules.upscaler import perform_upscale from modules.flags import Performance from modules.meta_parser import get_metadata_parser, MetadataScheme @@ -68,10 +71,15 @@ def progressbar(async_task, number, text): print(f'[Fooocus] {text}') async_task.yields.append(['preview', (number, text, None)]) - def yield_result(async_task, imgs, do_not_show_finished_images=False): + def yield_result(async_task, imgs, black_out_nsfw, censor=True, do_not_show_finished_images=False, + progressbar_index=flags.preparation_step_count): if not isinstance(imgs, list): imgs = [imgs] + if censor and (modules.config.default_black_out_nsfw or black_out_nsfw): + progressbar(async_task, progressbar_index, 'Checking for NSFW content ...') + imgs = default_censor(imgs) + async_task.results = async_task.results + imgs if do_not_show_finished_images: @@ -147,7 +155,8 @@ def handler(async_task): base_model_name = args.pop() refiner_model_name = args.pop() refiner_switch = args.pop() - loras = get_enabled_loras([[bool(args.pop()), str(args.pop()), float(args.pop())] for _ in range(modules.config.default_max_lora_number)]) + loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in + range(modules.config.default_max_lora_number)]) input_image_checkbox = args.pop() current_tab = args.pop() uov_method = args.pop() @@ -160,12 +169,15 @@ def handler(async_task): disable_preview = args.pop() disable_intermediate_results = args.pop() disable_seed_increment = args.pop() + black_out_nsfw = args.pop() adm_scaler_positive = args.pop() adm_scaler_negative = args.pop() adm_scaler_end = args.pop() adaptive_cfg = args.pop() + clip_skip = args.pop() sampler_name = args.pop() scheduler_name = args.pop() + vae_name = args.pop() overwrite_step = args.pop() overwrite_switch = args.pop() overwrite_width = args.pop() @@ -195,7 +207,8 @@ def handler(async_task): inpaint_erode_or_dilate = args.pop() save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False - metadata_scheme = MetadataScheme(args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS + metadata_scheme = MetadataScheme( + args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS cn_tasks = {x: [] for x in flags.ip_list} for _ in range(flags.controlnet_image_count): @@ -225,10 +238,12 @@ def handler(async_task): steps = performance_selection.steps() + performance_loras = [] + if performance_selection == Performance.EXTREME_SPEED: print('Enter LCM mode.') progressbar(async_task, 1, 'Downloading LCM components ...') - loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)] + performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)] if refiner_model_name != 'None': print(f'Refiner disabled in LCM mode.') @@ -247,7 +262,7 @@ def handler(async_task): elif performance_selection == Performance.LIGHTNING: print('Enter Lightning mode.') progressbar(async_task, 1, 'Downloading Lightning components ...') - loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)] + performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)] if refiner_model_name != 'None': print(f'Refiner disabled in Lightning mode.') @@ -263,7 +278,27 @@ def handler(async_task): adm_scaler_negative = 1.0 adm_scaler_end = 0.0 + elif performance_selection == Performance.HYPER_SD: + print('Enter Hyper-SD mode.') + progressbar(async_task, 1, 'Downloading Hyper-SD components ...') + performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)] + + if refiner_model_name != 'None': + print(f'Refiner disabled in Hyper-SD mode.') + + refiner_model_name = 'None' + sampler_name = 'dpmpp_sde_gpu' + scheduler_name = 'karras' + sharpness = 0.0 + guidance_scale = 1.0 + adaptive_cfg = 1.0 + refiner_switch = 1.0 + adm_scaler_positive = 1.0 + adm_scaler_negative = 1.0 + adm_scaler_end = 0.0 + print(f'[Parameters] Adaptive CFG = {adaptive_cfg}') + print(f'[Parameters] CLIP Skip = {clip_skip}') print(f'[Parameters] Sharpness = {sharpness}') print(f'[Parameters] ControlNet Softness = {controlnet_softness}') print(f'[Parameters] ADM Scale = ' @@ -425,14 +460,19 @@ def handler(async_task): extra_positive_prompts = prompts[1:] if len(prompts) > 1 else [] extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else [] - progressbar(async_task, 3, 'Loading models ...') + progressbar(async_task, 2, 'Loading models ...') + + loras, prompt = parse_lora_references_from_prompt(prompt, loras, modules.config.default_max_lora_number) + loras += performance_loras pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name, loras=loras, base_model_additional_loras=base_model_additional_loras, - use_synthetic_refiner=use_synthetic_refiner) + use_synthetic_refiner=use_synthetic_refiner, vae_name=vae_name) + + pipeline.set_clip_skip(clip_skip) progressbar(async_task, 3, 'Processing prompts ...') tasks = [] - + for i in range(image_number): if disable_seed_increment: task_seed = seed % (constants.MAX_SEED + 1) @@ -443,14 +483,20 @@ def handler(async_task): task_prompt = apply_wildcards(prompt, task_rng, i, read_wildcards_in_order) task_prompt = apply_arrays(task_prompt, i) task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, read_wildcards_in_order) - task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in extra_positive_prompts] - task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in extra_negative_prompts] + task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in + extra_positive_prompts] + task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in + extra_negative_prompts] positive_basic_workloads = [] negative_basic_workloads = [] + task_styles = style_selections.copy() if use_style: - for s in style_selections: + for i, s in enumerate(task_styles): + if s == random_style_name: + s = get_random_style(task_rng) + task_styles[i] = s p, n = apply_style(s, positive=task_prompt) positive_basic_workloads = positive_basic_workloads + p negative_basic_workloads = negative_basic_workloads + n @@ -478,29 +524,30 @@ def handler(async_task): negative_top_k=len(negative_basic_workloads), log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts), log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts), + styles=task_styles )) if use_expansion: for i, t in enumerate(tasks): - progressbar(async_task, 5, f'Preparing Fooocus text #{i + 1} ...') + progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...') expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed']) print(f'[Prompt Expansion] {expansion}') t['expansion'] = expansion t['positive'] = copy.deepcopy(t['positive']) + [expansion] # Deep copy. for i, t in enumerate(tasks): - progressbar(async_task, 7, f'Encoding positive #{i + 1} ...') + progressbar(async_task, 5, f'Encoding positive #{i + 1} ...') t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k']) for i, t in enumerate(tasks): if abs(float(cfg_scale) - 1.0) < 1e-4: t['uc'] = pipeline.clone_cond(t['c']) else: - progressbar(async_task, 10, f'Encoding negative #{i + 1} ...') + progressbar(async_task, 6, f'Encoding negative #{i + 1} ...') t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k']) if len(goals) > 0: - progressbar(async_task, 13, 'Image processing ...') + progressbar(async_task, 7, 'Image processing ...') if 'vary' in goals: if 'subtle' in uov_method: @@ -521,7 +568,7 @@ def handler(async_task): uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil) initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 13, 'VAE encoding ...') + progressbar(async_task, 8, 'VAE encoding ...') candidate_vae, _ = pipeline.get_candidate_vae( steps=steps, @@ -538,7 +585,7 @@ def handler(async_task): if 'upscale' in goals: H, W, C = uov_input_image.shape - progressbar(async_task, 13, f'Upscaling image from {str((H, W))} ...') + progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...') uov_input_image = perform_upscale(uov_input_image) print(f'Image upscaled.') @@ -572,8 +619,12 @@ def handler(async_task): if direct_return: d = [('Upscale (Fast)', 'upscale_fast', '2x')] + if modules.config.default_black_out_nsfw or black_out_nsfw: + progressbar(async_task, 100, 'Checking for NSFW content ...') + uov_input_image = default_censor(uov_input_image) + progressbar(async_task, 100, 'Saving image to system ...') uov_input_image_path = log(uov_input_image, d, output_format=output_format) - yield_result(async_task, uov_input_image_path, do_not_show_finished_images=True) + yield_result(async_task, uov_input_image_path, black_out_nsfw, False, do_not_show_finished_images=True) return tiled = True @@ -583,7 +634,7 @@ def handler(async_task): denoising_strength = overwrite_upscale_strength initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 13, 'VAE encoding ...') + progressbar(async_task, 10, 'VAE encoding ...') candidate_vae, _ = pipeline.get_candidate_vae( steps=steps, @@ -637,11 +688,11 @@ def handler(async_task): ) if debugging_inpaint_preprocessor: - yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), + yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), black_out_nsfw, do_not_show_finished_images=True) return - progressbar(async_task, 13, 'VAE Inpaint encoding ...') + progressbar(async_task, 11, 'VAE Inpaint encoding ...') inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill) inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image) @@ -661,7 +712,7 @@ def handler(async_task): latent_swap = None if candidate_vae_swap is not None: - progressbar(async_task, 13, 'VAE SD15 encoding ...') + progressbar(async_task, 12, 'VAE SD15 encoding ...') latent_swap = core.encode_vae( vae=candidate_vae_swap, pixels=inpaint_pixel_fill)['samples'] @@ -701,7 +752,7 @@ def handler(async_task): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return for task in cn_tasks[flags.cn_cpds]: cn_img, cn_stop, cn_weight = task @@ -713,7 +764,7 @@ def handler(async_task): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return for task in cn_tasks[flags.cn_ip]: cn_img, cn_stop, cn_weight = task @@ -724,7 +775,7 @@ def handler(async_task): task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return for task in cn_tasks[flags.cn_ip_face]: cn_img, cn_stop, cn_weight = task @@ -738,7 +789,7 @@ def handler(async_task): task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face] @@ -773,29 +824,31 @@ def handler(async_task): final_sampler_name = sampler_name final_scheduler_name = scheduler_name - if scheduler_name == 'lcm': + if scheduler_name in ['lcm', 'tcd']: final_scheduler_name = 'sgm_uniform' if pipeline.final_unet is not None: pipeline.final_unet = core.opModelSamplingDiscrete.patch( pipeline.final_unet, - sampling='lcm', + sampling=scheduler_name, zsnr=False)[0] if pipeline.final_refiner_unet is not None: pipeline.final_refiner_unet = core.opModelSamplingDiscrete.patch( pipeline.final_refiner_unet, - sampling='lcm', + sampling=scheduler_name, zsnr=False)[0] - print('Using lcm scheduler.') + print(f'Using {scheduler_name} scheduler.') - async_task.yields.append(['preview', (13, 'Moving model to GPU ...', None)]) + async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)]) def callback(step, x0, x, total_steps, y): done_steps = current_task_id * steps + step async_task.yields.append(['preview', ( - int(15.0 + 85.0 * float(done_steps) / float(all_steps)), - f'Step {step}/{total_steps} in the {current_task_id + 1}{ordinal_suffix(current_task_id + 1)} Sampling', y)]) + int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)), + f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{image_number} ...', y)]) for current_task_id, task in enumerate(tasks): + current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(current_task_id * steps) / float(all_steps)) + progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{image_number} ...') execution_start_time = time.perf_counter() try: @@ -838,11 +891,18 @@ def callback(step, x0, x, total_steps, y): imgs = [inpaint_worker.current_task.post_process(x) for x in imgs] img_paths = [] + current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float((current_task_id + 1) * steps) / float(all_steps)) + if modules.config.default_black_out_nsfw or black_out_nsfw: + progressbar(async_task, current_progress, 'Checking for NSFW content ...') + imgs = default_censor(imgs) + + progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{image_number} to system ...') for x in imgs: d = [('Prompt', 'prompt', task['log_positive_prompt']), ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']), ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']), - ('Styles', 'styles', str(raw_style_selections)), + ('Styles', 'styles', + str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])), ('Performance', 'performance', performance_selection.value)] if performance_selection.steps() != steps: @@ -865,10 +925,14 @@ def callback(step, x0, x, total_steps, y): if refiner_swap_method != flags.refiner_swap_method: d.append(('Refiner Swap Method', 'refiner_swap_method', refiner_swap_method)) if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr: - d.append(('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg)) + d.append( + ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg)) + if clip_skip > 1: + d.append(('CLIP Skip', 'clip_skip', clip_skip)) d.append(('Sampler', 'sampler', sampler_name)) d.append(('Scheduler', 'scheduler', scheduler_name)) + d.append(('VAE', 'vae', vae_name)) d.append(('Seed', 'seed', str(task['task_seed']))) if freeu_enabled: @@ -883,12 +947,14 @@ def callback(step, x0, x, total_steps, y): metadata_parser = modules.meta_parser.get_metadata_parser(metadata_scheme) metadata_parser.set_data(task['log_positive_prompt'], task['positive'], task['log_negative_prompt'], task['negative'], - steps, base_model_name, refiner_model_name, loras) - d.append(('Metadata Scheme', 'metadata_scheme', metadata_scheme.value if save_metadata_to_images else save_metadata_to_images)) + steps, base_model_name, refiner_model_name, loras, vae_name) + d.append(('Metadata Scheme', 'metadata_scheme', + metadata_scheme.value if save_metadata_to_images else save_metadata_to_images)) d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version)) - img_paths.append(log(x, d, metadata_parser, output_format)) + img_paths.append(log(x, d, metadata_parser, output_format, task)) - yield_result(async_task, img_paths, do_not_show_finished_images=len(tasks) == 1 or disable_intermediate_results) + yield_result(async_task, img_paths, black_out_nsfw, False, + do_not_show_finished_images=len(tasks) == 1 or disable_intermediate_results) except ldm_patched.modules.model_management.InterruptProcessingException as e: if async_task.last_stop == 'skip': print('User skipped') diff --git a/repositories/Fooocus/modules/config.py b/repositories/Fooocus/modules/config.py index b81e218..cb651c5 100644 --- a/repositories/Fooocus/modules/config.py +++ b/repositories/Fooocus/modules/config.py @@ -8,7 +8,7 @@ import modules.sdxl_styles from modules.model_loader import load_file_from_url -from modules.util import get_files_from_folder, makedirs_with_log +from modules.extra_utils import makedirs_with_log, get_files_from_folder from modules.flags import OutputFormat, Performance, MetadataScheme @@ -20,7 +20,7 @@ def get_config_path(key, default_value): else: return os.path.abspath(default_value) - +wildcards_max_bfs_depth = 64 config_path = get_config_path('config_path', "./config.txt") config_example_path = get_config_path('config_example_path', "config_modification_tutorial.txt") config_dict = {} @@ -189,12 +189,14 @@ def get_dir_or_set_default(key, default_value, as_array=False, make_directory=Fa paths_loras = get_dir_or_set_default('path_loras', ['../models/loras/'], True) path_embeddings = get_dir_or_set_default('path_embeddings', '../models/embeddings/') path_vae_approx = get_dir_or_set_default('path_vae_approx', '../models/vae_approx/') +path_vae = get_dir_or_set_default('path_vae', '../models/vae/') path_upscale_models = get_dir_or_set_default('path_upscale_models', '../models/upscale_models/') path_inpaint = get_dir_or_set_default('path_inpaint', '../models/inpaint/') path_controlnet = get_dir_or_set_default('path_controlnet', '../models/controlnet/') path_clip_vision = get_dir_or_set_default('path_clip_vision', '../models/clip_vision/') path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../models/prompt_expansion/fooocus_expansion') path_wildcards = get_dir_or_set_default('path_wildcards', '../wildcards/') +path_safety_checker = get_dir_or_set_default('path_safety_checker', '../models/safety_checker/') path_outputs = get_path_output() @@ -346,6 +348,11 @@ def init_temp_path(path: str | None, default_path: str) -> str: default_value='karras', validator=lambda x: x in modules.flags.scheduler_list ) +default_vae = get_config_item_or_set_default( + key='default_vae', + default_value=modules.flags.default_vae, + validator=lambda x: isinstance(x, str) +) default_styles = get_config_item_or_set_default( key='default_styles', default_value=[ @@ -409,13 +416,7 @@ def init_temp_path(path: str | None, default_path: str) -> str: ) available_aspect_ratios = get_config_item_or_set_default( key='available_aspect_ratios', - default_value=[ - '704*1408', '704*1344', '768*1344', '768*1280', '832*1216', '832*1152', - '896*1152', '896*1088', '960*1088', '960*1024', '1024*1024', '1024*960', - '1088*960', '1088*896', '1152*896', '1152*832', '1216*832', '1280*768', - '1344*768', '1344*704', '1408*704', '1472*704', '1536*640', '1600*640', - '1664*576', '1728*576' - ], + default_value=modules.flags.sdxl_aspect_ratios, validator=lambda x: isinstance(x, list) and all('*' in v for v in x) and len(x) > 1 ) default_aspect_ratio = get_config_item_or_set_default( @@ -433,6 +434,11 @@ def init_temp_path(path: str | None, default_path: str) -> str: default_value=7.0, validator=lambda x: isinstance(x, numbers.Number) ) +default_clip_skip = get_config_item_or_set_default( + key='default_clip_skip', + default_value=2, + validator=lambda x: isinstance(x, int) and 1 <= x <= modules.flags.clip_skip_max +) default_overwrite_step = get_config_item_or_set_default( key='default_overwrite_step', default_value=-1, @@ -450,6 +456,11 @@ def init_temp_path(path: str | None, default_path: str) -> str: ], validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x) ) +default_black_out_nsfw = get_config_item_or_set_default( + key='default_black_out_nsfw', + default_value=False, + validator=lambda x: isinstance(x, bool) +) default_save_metadata_to_images = get_config_item_or_set_default( key='default_save_metadata_to_images', default_value=False, @@ -481,6 +492,8 @@ def init_temp_path(path: str | None, default_path: str) -> str: "default_loras": "", "default_cfg_scale": "guidance_scale", "default_sample_sharpness": "sharpness", + "default_cfg_tsnr": "adaptive_cfg", + "default_clip_skip": "clip_skip", "default_sampler": "sampler", "default_scheduler": "scheduler", "default_overwrite_step": "steps", @@ -514,7 +527,7 @@ def add_ratio(x): default_aspect_ratio = add_ratio(default_aspect_ratio) -available_aspect_ratios = [add_ratio(x) for x in available_aspect_ratios] +available_aspect_ratios_labels = [add_ratio(x) for x in available_aspect_ratios] # Only write config in the first launch. @@ -535,26 +548,45 @@ def add_ratio(x): model_filenames = [] lora_filenames = [] +lora_filenames_no_special = [] +vae_filenames = [] wildcard_filenames = [] sdxl_lcm_lora = 'sdxl_lcm_lora.safetensors' sdxl_lightning_lora = 'sdxl_lightning_4step_lora.safetensors' -loras_metadata_remove = [sdxl_lcm_lora, sdxl_lightning_lora] +sdxl_hyper_sd_lora = 'sdxl_hyper_sd_4step_lora.safetensors' +loras_metadata_remove = [sdxl_lcm_lora, sdxl_lightning_lora, sdxl_hyper_sd_lora] + + +def remove_special_loras(lora_filenames): + global loras_metadata_remove + + loras_no_special = lora_filenames.copy() + for lora_to_remove in loras_metadata_remove: + if lora_to_remove in loras_no_special: + loras_no_special.remove(lora_to_remove) + return loras_no_special def get_model_filenames(folder_paths, extensions=None, name_filter=None): if extensions is None: extensions = ['.pth', '.ckpt', '.bin', '.safetensors', '.fooocus.patch'] files = [] + + if not isinstance(folder_paths, list): + folder_paths = [folder_paths] for folder in folder_paths: files += get_files_from_folder(folder, extensions, name_filter) + return files def update_files(): - global model_filenames, lora_filenames, wildcard_filenames, available_presets + global model_filenames, lora_filenames, lora_filenames_no_special, vae_filenames, wildcard_filenames, available_presets model_filenames = get_model_filenames(paths_checkpoints) lora_filenames = get_model_filenames(paths_loras) + lora_filenames_no_special = remove_special_loras(lora_filenames) + vae_filenames = get_model_filenames(path_vae) wildcard_filenames = get_files_from_folder(path_wildcards, ['.txt']) available_presets = get_presets() return @@ -608,13 +640,22 @@ def downloading_sdxl_lcm_lora(): def downloading_sdxl_lightning_lora(): load_file_from_url( - url='https://huggingface.co/ByteDance/SDXL-Lightning/resolve/main/sdxl_lightning_4step_lora.safetensors', + url='https://huggingface.co/mashb1t/misc/resolve/main/sdxl_lightning_4step_lora.safetensors', model_dir=paths_loras[0], file_name=sdxl_lightning_lora ) return sdxl_lightning_lora +def downloading_sdxl_hyper_sd_lora(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/sdxl_hyper_sd_4step_lora.safetensors', + model_dir=paths_loras[0], + file_name=sdxl_hyper_sd_lora + ) + return sdxl_hyper_sd_lora + + def downloading_controlnet_canny(): load_file_from_url( url='https://huggingface.co/lllyasviel/misc/resolve/main/control-lora-canny-rank128.safetensors', @@ -679,5 +720,13 @@ def downloading_upscale_model(): ) return os.path.join(path_upscale_models, 'fooocus_upscaler_s409985e5.bin') +def downloading_safety_checker_model(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/stable-diffusion-safety-checker.bin', + model_dir=path_safety_checker, + file_name='stable-diffusion-safety-checker.bin' + ) + return os.path.join(path_safety_checker, 'stable-diffusion-safety-checker.bin') + update_files() diff --git a/repositories/Fooocus/modules/core.py b/repositories/Fooocus/modules/core.py index 38ee8e8..3ca4cc5 100644 --- a/repositories/Fooocus/modules/core.py +++ b/repositories/Fooocus/modules/core.py @@ -35,12 +35,13 @@ class StableDiffusionModel: - def __init__(self, unet=None, vae=None, clip=None, clip_vision=None, filename=None): + def __init__(self, unet=None, vae=None, clip=None, clip_vision=None, filename=None, vae_filename=None): self.unet = unet self.vae = vae self.clip = clip self.clip_vision = clip_vision self.filename = filename + self.vae_filename = vae_filename self.unet_with_lora = unet self.clip_with_lora = clip self.visited_loras = '' @@ -142,9 +143,10 @@ def apply_controlnet(positive, negative, control_net, image, strength, start_per @torch.no_grad() @torch.inference_mode() -def load_model(ckpt_filename): - unet, clip, vae, clip_vision = load_checkpoint_guess_config(ckpt_filename, embedding_directory=path_embeddings) - return StableDiffusionModel(unet=unet, clip=clip, vae=vae, clip_vision=clip_vision, filename=ckpt_filename) +def load_model(ckpt_filename, vae_filename=None): + unet, clip, vae, vae_filename, clip_vision = load_checkpoint_guess_config(ckpt_filename, embedding_directory=path_embeddings, + vae_filename_param=vae_filename) + return StableDiffusionModel(unet=unet, clip=clip, vae=vae, clip_vision=clip_vision, filename=ckpt_filename, vae_filename=vae_filename) @torch.no_grad() diff --git a/repositories/Fooocus/modules/default_pipeline.py b/repositories/Fooocus/modules/default_pipeline.py index 190601e..494644d 100644 --- a/repositories/Fooocus/modules/default_pipeline.py +++ b/repositories/Fooocus/modules/default_pipeline.py @@ -3,6 +3,7 @@ import torch import modules.patch import modules.config +import modules.flags import ldm_patched.modules.model_management import ldm_patched.modules.latent_formats import modules.inpaint_worker @@ -58,17 +59,21 @@ def assert_model_integrity(): @torch.no_grad() @torch.inference_mode() -def refresh_base_model(name): +def refresh_base_model(name, vae_name=None): global model_base filename = get_file_from_folder_list(name, modules.config.paths_checkpoints) - if model_base.filename == filename: + vae_filename = None + if vae_name is not None and vae_name != modules.flags.default_vae: + vae_filename = get_file_from_folder_list(vae_name, modules.config.path_vae) + + if model_base.filename == filename and model_base.vae_filename == vae_filename: return - model_base = core.StableDiffusionModel() - model_base = core.load_model(filename) + model_base = core.load_model(filename, vae_filename) print(f'Base model loaded: {model_base.filename}') + print(f'VAE loaded: {model_base.vae_filename}') return @@ -196,6 +201,17 @@ def clip_encode(texts, pool_top_k=1): return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]] +@torch.no_grad() +@torch.inference_mode() +def set_clip_skip(clip_skip: int): + global final_clip + + if final_clip is None: + return + + final_clip.clip_layer(-abs(clip_skip)) + return + @torch.no_grad() @torch.inference_mode() def clear_all_caches(): @@ -216,7 +232,7 @@ def prepare_text_encoder(async_call=True): @torch.no_grad() @torch.inference_mode() def refresh_everything(refiner_model_name, base_model_name, loras, - base_model_additional_loras=None, use_synthetic_refiner=False): + base_model_additional_loras=None, use_synthetic_refiner=False, vae_name=None): global final_unet, final_clip, final_vae, final_refiner_unet, final_refiner_vae, final_expansion final_unet = None @@ -227,11 +243,11 @@ def refresh_everything(refiner_model_name, base_model_name, loras, if use_synthetic_refiner and refiner_model_name == 'None': print('Synthetic Refiner Activated') - refresh_base_model(base_model_name) + refresh_base_model(base_model_name, vae_name) synthesize_refiner_model() else: refresh_refiner_model(refiner_model_name) - refresh_base_model(base_model_name) + refresh_base_model(base_model_name, vae_name) refresh_loras(loras, base_model_additional_loras=base_model_additional_loras) assert_model_integrity() @@ -254,7 +270,8 @@ def refresh_everything(refiner_model_name, base_model_name, loras, refresh_everything( refiner_model_name=modules.config.default_refiner_model_name, base_model_name=modules.config.default_base_model_name, - loras=get_enabled_loras(modules.config.default_loras) + loras=get_enabled_loras(modules.config.default_loras), + vae_name=modules.config.default_vae, ) diff --git a/repositories/Fooocus/modules/extra_utils.py b/repositories/Fooocus/modules/extra_utils.py new file mode 100644 index 0000000..9906c82 --- /dev/null +++ b/repositories/Fooocus/modules/extra_utils.py @@ -0,0 +1,26 @@ +import os + +def makedirs_with_log(path): + try: + os.makedirs(path, exist_ok=True) + except OSError as error: + print(f'Directory {path} could not be created, reason: {error}') + + +def get_files_from_folder(folder_path, extensions=None, name_filter=None): + if not os.path.isdir(folder_path): + raise ValueError("Folder path is not a valid directory.") + + filenames = [] + + for root, _, files in os.walk(folder_path, topdown=False): + relative_path = os.path.relpath(root, folder_path) + if relative_path == ".": + relative_path = "" + for filename in sorted(files, key=lambda s: s.casefold()): + _, file_extension = os.path.splitext(filename) + if (extensions is None or file_extension.lower() in extensions) and (name_filter is None or name_filter in _): + path = os.path.join(relative_path, filename) + filenames.append(path) + + return filenames diff --git a/repositories/Fooocus/modules/flags.py b/repositories/Fooocus/modules/flags.py index c9d13fd..e48052e 100644 --- a/repositories/Fooocus/modules/flags.py +++ b/repositories/Fooocus/modules/flags.py @@ -34,7 +34,8 @@ "dpmpp_3m_sde": "", "dpmpp_3m_sde_gpu": "", "ddpm": "", - "lcm": "LCM" + "lcm": "LCM", + "tcd": "TCD" } SAMPLER_EXTRA = { @@ -47,12 +48,16 @@ KSAMPLER_NAMES = list(KSAMPLER.keys()) -SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo"] +SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo", "align_your_steps", "tcd"] SAMPLER_NAMES = KSAMPLER_NAMES + list(SAMPLER_EXTRA.keys()) sampler_list = SAMPLER_NAMES scheduler_list = SCHEDULER_NAMES +clip_skip_max = 12 + +default_vae = 'Default (model)' + refiner_swap_method = 'joint' cn_ip = "ImagePrompt" @@ -78,6 +83,13 @@ desc_type_photo = 'Photograph' desc_type_anime = 'Art/Anime' +sdxl_aspect_ratios = [ + '704*1408', '704*1344', '768*1344', '768*1280', '832*1216', '832*1152', + '896*1152', '896*1088', '960*1088', '960*1024', '1024*1024', '1024*960', + '1088*960', '1088*896', '1152*896', '1152*832', '1216*832', '1280*768', + '1344*768', '1344*704', '1408*704', '1472*704', '1536*640', '1600*640', + '1664*576', '1728*576' +] class MetadataScheme(Enum): FOOOCUS = 'fooocus' @@ -90,6 +102,7 @@ class MetadataScheme(Enum): ] controlnet_image_count = 4 +preparation_step_count = 13 class OutputFormat(Enum): @@ -107,6 +120,7 @@ class Steps(IntEnum): SPEED = 30 EXTREME_SPEED = 8 LIGHTNING = 4 + HYPER_SD = 4 class StepsUOV(IntEnum): @@ -114,6 +128,7 @@ class StepsUOV(IntEnum): SPEED = 18 EXTREME_SPEED = 8 LIGHTNING = 4 + HYPER_SD = 4 class Performance(Enum): @@ -121,6 +136,7 @@ class Performance(Enum): SPEED = 'Speed' EXTREME_SPEED = 'Extreme Speed' LIGHTNING = 'Lightning' + HYPER_SD = 'Hyper-SD' @classmethod def list(cls) -> list: @@ -130,7 +146,7 @@ def list(cls) -> list: def has_restricted_features(cls, x) -> bool: if isinstance(x, Performance): x = x.value - return x in [cls.EXTREME_SPEED.value, cls.LIGHTNING.value] + return x in [cls.EXTREME_SPEED.value, cls.LIGHTNING.value, cls.HYPER_SD.value] def steps(self) -> int | None: return Steps[self.name].value if Steps[self.name] else None diff --git a/repositories/Fooocus/modules/meta_parser.py b/repositories/Fooocus/modules/meta_parser.py index 70ab886..586e62d 100644 --- a/repositories/Fooocus/modules/meta_parser.py +++ b/repositories/Fooocus/modules/meta_parser.py @@ -34,18 +34,20 @@ def load_parameter_button_click(raw_metadata: dict | str, is_generating: bool): get_list('styles', 'Styles', loaded_parameter_dict, results) get_str('performance', 'Performance', loaded_parameter_dict, results) get_steps('steps', 'Steps', loaded_parameter_dict, results) - get_float('overwrite_switch', 'Overwrite Switch', loaded_parameter_dict, results) + get_number('overwrite_switch', 'Overwrite Switch', loaded_parameter_dict, results) get_resolution('resolution', 'Resolution', loaded_parameter_dict, results) - get_float('guidance_scale', 'Guidance Scale', loaded_parameter_dict, results) - get_float('sharpness', 'Sharpness', loaded_parameter_dict, results) + get_number('guidance_scale', 'Guidance Scale', loaded_parameter_dict, results) + get_number('sharpness', 'Sharpness', loaded_parameter_dict, results) get_adm_guidance('adm_guidance', 'ADM Guidance', loaded_parameter_dict, results) get_str('refiner_swap_method', 'Refiner Swap Method', loaded_parameter_dict, results) - get_float('adaptive_cfg', 'CFG Mimicking from TSNR', loaded_parameter_dict, results) + get_number('adaptive_cfg', 'CFG Mimicking from TSNR', loaded_parameter_dict, results) + get_number('clip_skip', 'CLIP Skip', loaded_parameter_dict, results, cast_type=int) get_str('base_model', 'Base Model', loaded_parameter_dict, results) get_str('refiner_model', 'Refiner Model', loaded_parameter_dict, results) - get_float('refiner_switch', 'Refiner Switch', loaded_parameter_dict, results) + get_number('refiner_switch', 'Refiner Switch', loaded_parameter_dict, results) get_str('sampler', 'Sampler', loaded_parameter_dict, results) get_str('scheduler', 'Scheduler', loaded_parameter_dict, results) + get_str('vae', 'VAE', loaded_parameter_dict, results) get_seed('seed', 'Seed', loaded_parameter_dict, results) if is_generating: @@ -82,11 +84,11 @@ def get_list(key: str, fallback: str | None, source_dict: dict, results: list, d results.append(gr.update()) -def get_float(key: str, fallback: str | None, source_dict: dict, results: list, default=None): +def get_number(key: str, fallback: str | None, source_dict: dict, results: list, default=None, cast_type=float): try: h = source_dict.get(key, source_dict.get(fallback, default)) assert h is not None - h = float(h) + h = cast_type(h) results.append(h) except: results.append(gr.update()) @@ -123,7 +125,7 @@ def get_resolution(key: str, fallback: str | None, source_dict: dict, results: l h = source_dict.get(key, source_dict.get(fallback, default)) width, height = eval(h) formatted = modules.config.add_ratio(f'{width}*{height}') - if formatted in modules.config.available_aspect_ratios: + if formatted in modules.config.available_aspect_ratios_labels: results.append(formatted) results.append(-1) results.append(-1) @@ -204,7 +206,6 @@ def get_lora(key: str, fallback: str | None, source_dict: dict, results: list): def get_sha256(filepath): global hash_cache if filepath not in hash_cache: - # is_safetensors = os.path.splitext(filepath)[1].lower() == '.safetensors' hash_cache[filepath] = sha256(filepath) return hash_cache[filepath] @@ -253,6 +254,7 @@ def __init__(self): self.refiner_model_name: str = '' self.refiner_model_hash: str = '' self.loras: list = [] + self.vae_name: str = '' @abstractmethod def get_scheme(self) -> MetadataScheme: @@ -267,7 +269,7 @@ def parse_string(self, metadata: dict) -> str: raise NotImplementedError def set_data(self, raw_prompt, full_prompt, raw_negative_prompt, full_negative_prompt, steps, base_model_name, - refiner_model_name, loras): + refiner_model_name, loras, vae_name): self.raw_prompt = raw_prompt self.full_prompt = full_prompt self.raw_negative_prompt = raw_negative_prompt @@ -289,12 +291,7 @@ def set_data(self, raw_prompt, full_prompt, raw_negative_prompt, full_negative_p lora_path = get_file_from_folder_list(lora_name, modules.config.paths_loras) lora_hash = get_sha256(lora_path) self.loras.append((Path(lora_name).stem, lora_weight, lora_hash)) - - @staticmethod - def remove_special_loras(lora_filenames): - for lora_to_remove in modules.config.loras_metadata_remove: - if lora_to_remove in lora_filenames: - lora_filenames.remove(lora_to_remove) + self.vae_name = Path(vae_name).stem class A1111MetadataParser(MetadataParser): @@ -310,6 +307,7 @@ def get_scheme(self) -> MetadataScheme: 'steps': 'Steps', 'sampler': 'Sampler', 'scheduler': 'Scheduler', + 'vae': 'VAE', 'guidance_scale': 'CFG scale', 'seed': 'Seed', 'resolution': 'Size', @@ -317,6 +315,7 @@ def get_scheme(self) -> MetadataScheme: 'adm_guidance': 'ADM Guidance', 'refiner_swap_method': 'Refiner Swap Method', 'adaptive_cfg': 'Adaptive CFG', + 'clip_skip': 'Clip skip', 'overwrite_switch': 'Overwrite Switch', 'freeu': 'FreeU', 'base_model': 'Model', @@ -397,13 +396,12 @@ def parse_json(self, metadata: str) -> dict: data['sampler'] = k break - for key in ['base_model', 'refiner_model']: + for key in ['base_model', 'refiner_model', 'vae']: if key in data: - for filename in modules.config.model_filenames: - path = Path(filename) - if data[key] == path.stem: - data[key] = filename - break + if key == 'vae': + self.add_extension_to_filename(data, modules.config.vae_filenames, 'vae') + else: + self.add_extension_to_filename(data, modules.config.model_filenames, key) lora_data = '' if 'lora_weights' in data and data['lora_weights'] != '': @@ -412,13 +410,11 @@ def parse_json(self, metadata: str) -> dict: lora_data = data['lora_hashes'] if lora_data != '': - lora_filenames = modules.config.lora_filenames.copy() - self.remove_special_loras(lora_filenames) for li, lora in enumerate(lora_data.split(', ')): lora_split = lora.split(': ') lora_name = lora_split[0] lora_weight = lora_split[2] if len(lora_split) == 3 else lora_split[1] - for filename in lora_filenames: + for filename in modules.config.lora_filenames_no_special: path = Path(filename) if lora_name == path.stem: data[f'lora_combined_{li + 1}'] = f'{filename} : {lora_weight}' @@ -433,6 +429,7 @@ def parse_string(self, metadata: dict) -> str: sampler = data['sampler'] scheduler = data['scheduler'] + if sampler in SAMPLERS and SAMPLERS[sampler] != '': sampler = SAMPLERS[sampler] if sampler not in CIVITAI_NO_KARRAS and scheduler == 'karras': @@ -451,6 +448,7 @@ def parse_string(self, metadata: dict) -> str: self.fooocus_to_a1111['performance']: data['performance'], self.fooocus_to_a1111['scheduler']: scheduler, + self.fooocus_to_a1111['vae']: Path(data['vae']).stem, # workaround for multiline prompts self.fooocus_to_a1111['raw_prompt']: self.raw_prompt, self.fooocus_to_a1111['raw_negative_prompt']: self.raw_negative_prompt, @@ -462,7 +460,7 @@ def parse_string(self, metadata: dict) -> str: self.fooocus_to_a1111['refiner_model_hash']: self.refiner_model_hash } - for key in ['adaptive_cfg', 'overwrite_switch', 'refiner_swap_method', 'freeu']: + for key in ['adaptive_cfg', 'clip_skip', 'overwrite_switch', 'refiner_swap_method', 'freeu']: if key in data: generation_params[self.fooocus_to_a1111[key]] = data[key] @@ -491,22 +489,29 @@ def parse_string(self, metadata: dict) -> str: negative_prompt_text = f"\nNegative prompt: {negative_prompt_resolved}" if negative_prompt_resolved else "" return f"{positive_prompt_resolved}{negative_prompt_text}\n{generation_params_text}".strip() + @staticmethod + def add_extension_to_filename(data, filenames, key): + for filename in filenames: + path = Path(filename) + if data[key] == path.stem: + data[key] = filename + break + class FooocusMetadataParser(MetadataParser): def get_scheme(self) -> MetadataScheme: return MetadataScheme.FOOOCUS def parse_json(self, metadata: dict) -> dict: - model_filenames = modules.config.model_filenames.copy() - lora_filenames = modules.config.lora_filenames.copy() - self.remove_special_loras(lora_filenames) for key, value in metadata.items(): if value in ['', 'None']: continue if key in ['base_model', 'refiner_model']: - metadata[key] = self.replace_value_with_filename(key, value, model_filenames) + metadata[key] = self.replace_value_with_filename(key, value, modules.config.model_filenames) elif key.startswith('lora_combined_'): - metadata[key] = self.replace_value_with_filename(key, value, lora_filenames) + metadata[key] = self.replace_value_with_filename(key, value, modules.config.lora_filenames_no_special) + elif key == 'vae': + metadata[key] = self.replace_value_with_filename(key, value, modules.config.vae_filenames) else: continue @@ -533,6 +538,7 @@ def parse_string(self, metadata: list) -> str: res['refiner_model'] = self.refiner_model_name res['refiner_model_hash'] = self.refiner_model_hash + res['vae'] = self.vae_name res['loras'] = self.loras if modules.config.metadata_created_by != '': diff --git a/repositories/Fooocus/modules/model_loader.py b/repositories/Fooocus/modules/model_loader.py index 8ba336a..1143f75 100644 --- a/repositories/Fooocus/modules/model_loader.py +++ b/repositories/Fooocus/modules/model_loader.py @@ -14,6 +14,8 @@ def load_file_from_url( Returns the path to the downloaded file. """ + domain = os.environ.get("HF_MIRROR", "https://huggingface.co").rstrip('/') + url = str.replace(url, "https://huggingface.co", domain, 1) os.makedirs(model_dir, exist_ok=True) if not file_name: parts = urlparse(url) diff --git a/repositories/Fooocus/modules/patch_precision.py b/repositories/Fooocus/modules/patch_precision.py index 83569bd..22ffda0 100644 --- a/repositories/Fooocus/modules/patch_precision.py +++ b/repositories/Fooocus/modules/patch_precision.py @@ -51,6 +51,8 @@ def patched_register_schedule(self, given_betas=None, beta_schedule="linear", ti self.linear_end = linear_end sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32) self.set_sigmas(sigmas) + alphas_cumprod = torch.tensor(alphas_cumprod, dtype=torch.float32) + self.set_alphas_cumprod(alphas_cumprod) return diff --git a/repositories/Fooocus/modules/private_logger.py b/repositories/Fooocus/modules/private_logger.py index edd9457..eb8f0cc 100644 --- a/repositories/Fooocus/modules/private_logger.py +++ b/repositories/Fooocus/modules/private_logger.py @@ -21,7 +21,7 @@ def get_current_html_path(output_format=None): return html_name -def log(img, metadata, metadata_parser: MetadataParser | None = None, output_format=None) -> str: +def log(img, metadata, metadata_parser: MetadataParser | None = None, output_format=None, task=None) -> str: path_outputs = modules.config.temp_path if args_manager.args.disable_image_log else modules.config.path_outputs output_format = output_format if output_format else modules.config.default_output_format date_string, local_temp_filename, only_name = generate_temp_filename(folder=path_outputs, extension=output_format) @@ -111,9 +111,15 @@ def log(img, metadata, metadata_parser: MetadataParser | None = None, output_for for label, key, value in metadata: value_txt = str(value).replace('\n', '
') item += f"{label}{value_txt}\n" + + if task is not None and 'positive' in task and 'negative' in task: + full_prompt_details = f"""
Positive{', '.join(task['positive'])}
+
Negative{', '.join(task['negative'])}
""" + item += f"Full raw prompt{full_prompt_details}\n" + item += "" - js_txt = urllib.parse.quote(json.dumps({k: v for _, k, v in metadata}, indent=0), safe='') + js_txt = urllib.parse.quote(json.dumps({k: v for _, k, v, in metadata}, indent=0), safe='') item += f"
" item += "" diff --git a/repositories/Fooocus/modules/sample_hijack.py b/repositories/Fooocus/modules/sample_hijack.py index 5936a09..4ab3cbb 100644 --- a/repositories/Fooocus/modules/sample_hijack.py +++ b/repositories/Fooocus/modules/sample_hijack.py @@ -3,6 +3,7 @@ import ldm_patched.modules.model_management from collections import namedtuple +from ldm_patched.contrib.external_align_your_steps import AlignYourStepsScheduler from ldm_patched.contrib.external_custom_sampler import SDTurboScheduler from ldm_patched.k_diffusion import sampling as k_diffusion_sampling from ldm_patched.modules.samplers import normal_scheduler, simple_scheduler, ddim_scheduler @@ -175,6 +176,9 @@ def calculate_sigmas_scheduler_hacked(model, scheduler_name, steps): sigmas = normal_scheduler(model, steps, sgm=True) elif scheduler_name == "turbo": sigmas = SDTurboScheduler().get_sigmas(namedtuple('Patcher', ['model'])(model=model), steps=steps, denoise=1.0)[0] + elif scheduler_name == "align_your_steps": + model_type = 'SDXL' if isinstance(model.latent_format, ldm_patched.modules.latent_formats.SDXL) else 'SD1' + sigmas = AlignYourStepsScheduler().get_sigmas(model_type=model_type, steps=steps, denoise=1.0)[0] else: raise TypeError("error invalid scheduler") return sigmas diff --git a/repositories/Fooocus/modules/sdxl_styles.py b/repositories/Fooocus/modules/sdxl_styles.py index 77ad6b5..12ab6c5 100644 --- a/repositories/Fooocus/modules/sdxl_styles.py +++ b/repositories/Fooocus/modules/sdxl_styles.py @@ -2,13 +2,12 @@ import re import json import math -import modules.config -from modules.util import get_files_from_folder +from modules.extra_utils import get_files_from_folder +from random import Random # cannot use modules.config - validators causing circular imports styles_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../sdxl_styles/')) -wildcards_max_bfs_depth = 64 def normalize_key(k): @@ -24,7 +23,6 @@ def normalize_key(k): styles = {} - styles_files = get_files_from_folder(styles_path, ['.json']) for x in ['sdxl_styles_fooocus.json', @@ -50,8 +48,13 @@ def normalize_key(k): print(f'Failed to load style file {styles_file}') style_keys = list(styles.keys()) -fooocus_expansion = "Fooocus V2" -legal_style_names = [fooocus_expansion] + style_keys +fooocus_expansion = 'Fooocus V2' +random_style_name = 'Random Style' +legal_style_names = [fooocus_expansion, random_style_name] + style_keys + + +def get_random_style(rng: Random) -> str: + return rng.choice(list(styles.items()))[0] def apply_style(style, positive): @@ -59,34 +62,7 @@ def apply_style(style, positive): return p.replace('{prompt}', positive).splitlines(), n.splitlines() -def apply_wildcards(wildcard_text, rng, i, read_wildcards_in_order): - for _ in range(wildcards_max_bfs_depth): - placeholders = re.findall(r'__([\w-]+)__', wildcard_text) - if len(placeholders) == 0: - return wildcard_text - - print(f'[Wildcards] processing: {wildcard_text}') - for placeholder in placeholders: - try: - matches = [x for x in modules.config.wildcard_filenames if os.path.splitext(os.path.basename(x))[0] == placeholder] - words = open(os.path.join(modules.config.path_wildcards, matches[0]), encoding='utf-8').read().splitlines() - words = [x for x in words if x != ''] - assert len(words) > 0 - if read_wildcards_in_order: - wildcard_text = wildcard_text.replace(f'__{placeholder}__', words[i % len(words)], 1) - else: - wildcard_text = wildcard_text.replace(f'__{placeholder}__', rng.choice(words), 1) - except: - print(f'[Wildcards] Warning: {placeholder}.txt missing or empty. ' - f'Using "{placeholder}" as a normal word.') - wildcard_text = wildcard_text.replace(f'__{placeholder}__', placeholder) - print(f'[Wildcards] {wildcard_text}') - - print(f'[Wildcards] BFS stack overflow. Current text: {wildcard_text}') - return wildcard_text - - -def get_words(arrays, totalMult, index): +def get_words(arrays, total_mult, index): if len(arrays) == 1: return [arrays[0].split(',')[index]] else: @@ -95,7 +71,7 @@ def get_words(arrays, totalMult, index): index -= index % len(words) index /= len(words) index = math.floor(index) - return [word] + get_words(arrays[1:], math.floor(totalMult/len(words)), index) + return [word] + get_words(arrays[1:], math.floor(total_mult / len(words)), index) def apply_arrays(text, index): diff --git a/repositories/Fooocus/modules/ui_gradio_extensions.py b/repositories/Fooocus/modules/ui_gradio_extensions.py index bebf9f8..409c7e3 100644 --- a/repositories/Fooocus/modules/ui_gradio_extensions.py +++ b/repositories/Fooocus/modules/ui_gradio_extensions.py @@ -39,7 +39,7 @@ def javascript_html(): head += f'\n' head += f'\n' head += f'\n' - head += f'\n' + head += f'\n' if args_manager.args.theme: head += f'\n' diff --git a/repositories/Fooocus/modules/util.py b/repositories/Fooocus/modules/util.py index 9e0fb29..8317dd5 100644 --- a/repositories/Fooocus/modules/util.py +++ b/repositories/Fooocus/modules/util.py @@ -1,4 +1,4 @@ -import typing +from pathlib import Path import numpy as np import datetime @@ -6,16 +6,27 @@ import math import os import cv2 +import re +from typing import List, Tuple, AnyStr, NamedTuple + import json import hashlib from PIL import Image +import modules.config import modules.sdxl_styles LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) + +# Regexp compiled once. Matches entries with the following pattern: +# +# +LORAS_PROMPT_PATTERN = re.compile(r"()", re.X) + HASH_SHA256_LENGTH = 10 + def erode_or_dilate(x, k): k = int(k) if k > 0: @@ -163,25 +174,6 @@ def generate_temp_filename(folder='./outputs/', extension='png'): return date_string, os.path.abspath(result), filename -def get_files_from_folder(folder_path, extensions=None, name_filter=None): - if not os.path.isdir(folder_path): - raise ValueError("Folder path is not a valid directory.") - - filenames = [] - - for root, dirs, files in os.walk(folder_path, topdown=False): - relative_path = os.path.relpath(root, folder_path) - if relative_path == ".": - relative_path = "" - for filename in sorted(files, key=lambda s: s.casefold()): - _, file_extension = os.path.splitext(filename) - if (extensions is None or file_extension.lower() in extensions) and (name_filter is None or name_filter in _): - path = os.path.join(relative_path, filename) - filenames.append(path) - - return filenames - - def sha256(filename, use_addnet_hash=False, length=HASH_SHA256_LENGTH): print(f"Calculating sha256 for {filename}: ", end='') if use_addnet_hash: @@ -355,7 +347,7 @@ def extract_styles_from_prompt(prompt, negative_prompt): return list(reversed(extracted)), real_prompt, negative_prompt -class PromptStyle(typing.NamedTuple): +class PromptStyle(NamedTuple): name: str prompt: str negative_prompt: str @@ -370,7 +362,18 @@ def is_json(data: str) -> bool: return True +def get_filname_by_stem(lora_name, filenames: List[str]) -> str | None: + for filename in filenames: + path = Path(filename) + if lora_name == path.stem: + return filename + return None + + def get_file_from_folder_list(name, folders): + if not isinstance(folders, list): + folders = [folders] + for folder in folders: filename = os.path.abspath(os.path.realpath(os.path.join(folder, name))) if os.path.isfile(filename): @@ -378,7 +381,6 @@ def get_file_from_folder_list(name, folders): return os.path.abspath(os.path.realpath(os.path.join(folders[0], name))) - def ordinal_suffix(number: int) -> str: return 'th' if 10 <= number % 100 <= 20 else {1: 'st', 2: 'nd', 3: 'rd'}.get(number % 10, 'th') @@ -390,5 +392,111 @@ def makedirs_with_log(path): print(f'Directory {path} could not be created, reason: {error}') -def get_enabled_loras(loras: list) -> list: - return [[lora[1], lora[2]] for lora in loras if lora[0]] +def get_enabled_loras(loras: list, remove_none=True) -> list: + return [(lora[1], lora[2]) for lora in loras if lora[0] and (lora[1] != 'None' if remove_none else True)] + + +def parse_lora_references_from_prompt(prompt: str, loras: List[Tuple[AnyStr, float]], loras_limit: int = 5, + skip_file_check=False, prompt_cleanup=True, deduplicate_loras=True) -> tuple[List[Tuple[AnyStr, float]], str]: + found_loras = [] + prompt_without_loras = '' + cleaned_prompt = '' + for token in prompt.split(','): + matches = LORAS_PROMPT_PATTERN.findall(token) + + if len(matches) == 0: + prompt_without_loras += token + ', ' + continue + for match in matches: + lora_name = match[1] + '.safetensors' + if not skip_file_check: + lora_name = get_filname_by_stem(match[1], modules.config.lora_filenames_no_special) + if lora_name is not None: + found_loras.append((lora_name, float(match[2]))) + token = token.replace(match[0], '') + prompt_without_loras += token + ', ' + + if prompt_without_loras != '': + cleaned_prompt = prompt_without_loras[:-2] + + if prompt_cleanup: + cleaned_prompt = cleanup_prompt(prompt_without_loras) + + new_loras = [] + lora_names = [lora[0] for lora in loras] + for found_lora in found_loras: + if deduplicate_loras and (found_lora[0] in lora_names or found_lora in new_loras): + continue + new_loras.append(found_lora) + + if len(new_loras) == 0: + return loras, cleaned_prompt + + updated_loras = [] + for lora in loras + new_loras: + if lora[0] != "None": + updated_loras.append(lora) + + return updated_loras[:loras_limit], cleaned_prompt + + +def cleanup_prompt(prompt): + prompt = re.sub(' +', ' ', prompt) + prompt = re.sub(',+', ',', prompt) + cleaned_prompt = '' + for token in prompt.split(','): + token = token.strip() + if token == '': + continue + cleaned_prompt += token + ', ' + return cleaned_prompt[:-2] + + +def apply_wildcards(wildcard_text, rng, i, read_wildcards_in_order) -> str: + for _ in range(modules.config.wildcards_max_bfs_depth): + placeholders = re.findall(r'__([\w-]+)__', wildcard_text) + if len(placeholders) == 0: + return wildcard_text + + print(f'[Wildcards] processing: {wildcard_text}') + for placeholder in placeholders: + try: + matches = [x for x in modules.config.wildcard_filenames if os.path.splitext(os.path.basename(x))[0] == placeholder] + words = open(os.path.join(modules.config.path_wildcards, matches[0]), encoding='utf-8').read().splitlines() + words = [x for x in words if x != ''] + assert len(words) > 0 + if read_wildcards_in_order: + wildcard_text = wildcard_text.replace(f'__{placeholder}__', words[i % len(words)], 1) + else: + wildcard_text = wildcard_text.replace(f'__{placeholder}__', rng.choice(words), 1) + except: + print(f'[Wildcards] Warning: {placeholder}.txt missing or empty. ' + f'Using "{placeholder}" as a normal word.') + wildcard_text = wildcard_text.replace(f'__{placeholder}__', placeholder) + print(f'[Wildcards] {wildcard_text}') + + print(f'[Wildcards] BFS stack overflow. Current text: {wildcard_text}') + return wildcard_text + + +def get_image_size_info(image: np.ndarray, aspect_ratios: list) -> str: + try: + image = Image.fromarray(np.uint8(image)) + width, height = image.size + ratio = round(width / height, 2) + gcd = math.gcd(width, height) + lcm_ratio = f'{width // gcd}:{height // gcd}' + size_info = f'Image Size: {width} x {height}, Ratio: {ratio}, {lcm_ratio}' + + closest_ratio = min(aspect_ratios, key=lambda x: abs(ratio - float(x.split('*')[0]) / float(x.split('*')[1]))) + recommended_width, recommended_height = map(int, closest_ratio.split('*')) + recommended_ratio = round(recommended_width / recommended_height, 2) + recommended_gcd = math.gcd(recommended_width, recommended_height) + recommended_lcm_ratio = f'{recommended_width // recommended_gcd}:{recommended_height // recommended_gcd}' + + size_info = f'{width} x {height}, {ratio}, {lcm_ratio}' + size_info += f'\n{recommended_width} x {recommended_height}, {recommended_ratio}, {recommended_lcm_ratio}' + + return size_info + except Exception as e: + return f'Error reading image: {e}' diff --git a/repositories/Fooocus/presets/anime.json b/repositories/Fooocus/presets/anime.json index 2610677..78607ed 100644 --- a/repositories/Fooocus/presets/anime.json +++ b/repositories/Fooocus/presets/anime.json @@ -1,5 +1,5 @@ { - "default_model": "animaPencilXL_v100.safetensors", + "default_model": "animaPencilXL_v310.safetensors", "default_refiner": "None", "default_refiner_switch": 0.5, "default_loras": [ @@ -29,7 +29,7 @@ 1.0 ] ], - "default_cfg_scale": 7.0, + "default_cfg_scale": 6.0, "default_sample_sharpness": 2.0, "default_sampler": "dpmpp_2m_sde_gpu", "default_scheduler": "karras", @@ -43,9 +43,15 @@ ], "default_aspect_ratio": "896*1152", "checkpoint_downloads": { - "animaPencilXL_v100.safetensors": "https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/animaPencilXL_v100.safetensors" + "animaPencilXL_v310.safetensors": "https://huggingface.co/mashb1t/fav_models/resolve/main/fav/animaPencilXL_v310.safetensors" }, "embeddings_downloads": {}, "lora_downloads": {}, - "previous_default_models": [] + "previous_default_models": [ + "animaPencilXL_v300.safetensors", + "animaPencilXL_v260.safetensors", + "animaPencilXL_v210.safetensors", + "animaPencilXL_v200.safetensors", + "animaPencilXL_v100.safetensors" + ] } \ No newline at end of file diff --git a/repositories/Fooocus/sdxl_styles/samples/random_style.jpg b/repositories/Fooocus/sdxl_styles/samples/random_style.jpg new file mode 100644 index 0000000..9f68510 Binary files /dev/null and b/repositories/Fooocus/sdxl_styles/samples/random_style.jpg differ diff --git a/repositories/Fooocus/wildcards/.gitignore b/repositories/Fooocus/wildcards/.gitignore new file mode 100644 index 0000000..7e4ac18 --- /dev/null +++ b/repositories/Fooocus/wildcards/.gitignore @@ -0,0 +1,8 @@ +*.txt +!animal.txt +!artist.txt +!color.txt +!color_flower.txt +!extended-color.txt +!flower.txt +!nationality.txt \ No newline at end of file