diff --git a/README.md b/README.md index b4ab9fb..e57d2b8 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ FastAPI powered API for [Fooocus](https://github.com/lllyasviel/Fooocus) -Currently loaded Fooocus version: 2.1.679 +Currently loaded Fooocus version: 2.1.728 ### Run with Replicate Now you can use Fooocus-API by Replicate, the model is in [konieshadow/fooocus-api](https://replicate.com/konieshadow/fooocus-api). diff --git a/fooocusapi/repositories_versions.py b/fooocusapi/repositories_versions.py index 1916bb8..b13145c 100644 --- a/fooocusapi/repositories_versions.py +++ b/fooocusapi/repositories_versions.py @@ -1,5 +1,5 @@ import os -fooocus_version = '2.1.679' +fooocus_version = '2.1.728' fooocus_commit_hash = os.environ.get( - 'FOOOCUS_COMMIT_HASH', "589409e3f4832014b931884eac6963997de01739") + 'FOOOCUS_COMMIT_HASH', "4cf0c778da33fb91d591771bbe90123a45c54d3c") diff --git a/fooocusapi/worker.py b/fooocusapi/worker.py index e3abcea..d4b46b0 100644 --- a/fooocusapi/worker.py +++ b/fooocusapi/worker.py @@ -24,11 +24,11 @@ def process_generate(params: ImageGenerationParams) -> List[ImageGenerationResul import modules.advanced_parameters as advanced_parameters import fooocus_extras.preprocessors as preprocessors import fooocus_extras.ip_adapter as ip_adapter - from modules.util import join_prompts, remove_empty_str, image_is_generated_in_current_ui, resize_image, HWC3, make_sure_that_image_is_not_too_large + from modules.util import join_prompts, remove_empty_str, resize_image, HWC3, set_image_shape_ceil, get_image_shape_ceil, get_shape_ceil from modules.private_logger import log from modules.upscaler import perform_upscale from modules.expansion import safe_str - from modules.sdxl_styles import apply_style, fooocus_expansion, aspect_ratios + from modules.sdxl_styles import apply_style, fooocus_expansion, aspect_ratios, apply_wildcards outputs = [] @@ -168,11 +168,8 @@ def build_advanced_parameters(): skip_prompt_processing = False refiner_swap_method = advanced_parameters.refiner_swap_method - raw_prompt = prompt - raw_negative_prompt = negative_prompt - inpaint_image = None - input_mask = None + inpaint_mask = None inpaint_head_model_path = None controlnet_canny_path = None controlnet_cpds_path = None @@ -221,10 +218,10 @@ def build_advanced_parameters(): if (current_tab == 'inpaint' or (current_tab == 'ip' and advanced_parameters.mixing_image_prompt_and_inpaint))\ and isinstance(inpaint_input_image, dict): inpaint_image = inpaint_input_image['image'] - input_mask = inpaint_input_image['mask'][:, :, 0] + inpaint_mask = inpaint_input_image['mask'][:, :, 0] inpaint_image = HWC3(inpaint_image) - if isinstance(inpaint_image, np.ndarray) and isinstance(input_mask, np.ndarray) \ - and (np.any(input_mask > 127) or len(outpaint_selections) > 0): + if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \ + and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0): progressbar(1, 'Downloading inpainter ...') inpaint_head_model_path, inpaint_patch_model_path = path.downloading_inpaint_models(advanced_parameters.inpaint_engine) loras += [(inpaint_patch_model_path, 1.0)] @@ -280,62 +277,70 @@ def build_advanced_parameters(): pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name, loras=loras) progressbar(3, 'Processing prompts ...') - positive_basic_workloads = [] - negative_basic_workloads = [] - - if use_style: - for s in style_selections: - p, n = apply_style(s, positive=prompt) - positive_basic_workloads.append(p) - negative_basic_workloads.append(n) - else: - positive_basic_workloads.append(prompt) - - negative_basic_workloads.append(negative_prompt) # Always use independent workload for negative. - - positive_basic_workloads = positive_basic_workloads + extra_positive_prompts - negative_basic_workloads = negative_basic_workloads + extra_negative_prompts - - positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt) - negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt) - - positive_top_k = len(positive_basic_workloads) - negative_top_k = len(negative_basic_workloads) - - tasks = [dict( - task_seed=seed + i, - positive=positive_basic_workloads, - negative=negative_basic_workloads, - expansion='', - c=None, - uc=None, - ) for i in range(image_number)] + tasks = [] + for i in range(image_number): + task_seed = seed + i + task_rng = random.Random(task_seed) # may bind to inpaint noise in the future + + task_prompt = apply_wildcards(prompt, task_rng) + task_negative_prompt = apply_wildcards(negative_prompt, task_rng) + task_extra_positive_prompts = [apply_wildcards(pmt, task_rng) for pmt in extra_positive_prompts] + task_extra_negative_prompts = [apply_wildcards(pmt, task_rng) for pmt in extra_negative_prompts] + + positive_basic_workloads = [] + negative_basic_workloads = [] + + if use_style: + for s in style_selections: + p, n = apply_style(s, positive=task_prompt) + positive_basic_workloads.append(p) + negative_basic_workloads.append(n) + else: + positive_basic_workloads.append(task_prompt) + + negative_basic_workloads.append(task_negative_prompt) # Always use independent workload for negative. + + positive_basic_workloads = positive_basic_workloads + task_extra_positive_prompts + negative_basic_workloads = negative_basic_workloads + task_extra_negative_prompts + + positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=task_prompt) + negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=task_negative_prompt) + + tasks.append(dict( + task_seed=task_seed, + task_prompt=task_prompt, + task_negative_prompt=task_negative_prompt, + positive=positive_basic_workloads, + negative=negative_basic_workloads, + expansion='', + c=None, + uc=None, + positive_top_k=len(positive_basic_workloads), + negative_top_k=len(negative_basic_workloads), + log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts), + log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts), + )) if use_expansion: for i, t in enumerate(tasks): progressbar(5, f'Preparing Fooocus text #{i + 1} ...') - expansion = pipeline.final_expansion(prompt, t['task_seed']) + expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed']) print(f'[Prompt Expansion] New suffix: {expansion}') t['expansion'] = expansion - t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)] # Deep copy. + t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(t['task_prompt'], expansion)] # Deep copy. for i, t in enumerate(tasks): progressbar(7, f'Encoding positive #{i + 1} ...') - t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=positive_top_k) + t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k']) for i, t in enumerate(tasks): progressbar(10, f'Encoding negative #{i + 1} ...') - t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=negative_top_k) + t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k']) if len(goals) > 0: progressbar(13, 'Image processing ...') if 'vary' in goals: - if not image_is_generated_in_current_ui(uov_input_image, ui_width=width, ui_height=height): - uov_input_image = resize_image(uov_input_image, width=width, height=height) - print(f'Resolution corrected - users are uploading their own images.') - else: - print(f'Processing images generated by Fooocus.') if 'subtle' in uov_method: denoising_strength = 0.5 if 'strong' in uov_method: @@ -343,7 +348,16 @@ def build_advanced_parameters(): if advanced_parameters.overwrite_vary_strength > 0: denoising_strength = advanced_parameters.overwrite_vary_strength - uov_input_image = make_sure_that_image_is_not_too_large(uov_input_image) + shape_ceil = get_image_shape_ceil(uov_input_image) + if shape_ceil < 1024: + print(f'[Vary] Image is resized because it is too small.') + shape_ceil = 1024 + elif shape_ceil > 2048: + print(f'[Vary] Image is resized because it is too big.') + shape_ceil = 2048 + + uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil) + initial_pixels = core.numpy_to_pytorch(uov_input_image) progressbar(13, 'VAE encoding ...') initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=initial_pixels) @@ -368,18 +382,12 @@ def build_advanced_parameters(): else: f = 1.0 - width_f = int(width * f) - height_f = int(height * f) - - if image_is_generated_in_current_ui(uov_input_image, ui_width=width_f, ui_height=height_f): - uov_input_image = resize_image(uov_input_image, width=int(W * f), height=int(H * f)) - print(f'Processing images generated by Fooocus.') - else: - uov_input_image = resize_image(uov_input_image, width=width_f, height=height_f) - print(f'Resolution corrected - users are uploading their own images.') - - H, W, C = uov_input_image.shape - image_is_super_large = H * W > 2800 * 2800 + shape_ceil = get_shape_ceil(H * f, W * f) + if shape_ceil < 1024: + print(f'[Upscale] Image is resized because it is too small.') + shape_ceil = 1024 + uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil) + image_is_super_large = shape_ceil > 2800 if 'fast' in uov_method: direct_return = True @@ -393,12 +401,9 @@ def build_advanced_parameters(): if direct_return: d = [('Upscale (Fast)', '2x')] - if save_log: - log(uov_input_image, d, single_line_number=1) - outputs.append(['results', [uov_input_image], -1 if len(tasks) == 0 else tasks[0]['task_seed']]) - results = make_results_from_outputs() - task_queue.finish_task(task_seq, results, False) - return results * image_number + log(uov_input_image, d, single_line_number=1) + outputs.append(['results', [uov_input_image]]) + return tiled = True denoising_strength = 0.382 @@ -409,70 +414,82 @@ def build_advanced_parameters(): initial_pixels = core.numpy_to_pytorch(uov_input_image) progressbar(13, 'VAE encoding ...') - initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=initial_pixels, tiled=True) + initial_latent = core.encode_vae( + vae=pipeline.final_vae if pipeline.final_refiner_vae is None else pipeline.final_refiner_vae, + pixels=initial_pixels, tiled=True) B, C, H, W = initial_latent['samples'].shape width = W * 8 height = H * 8 print(f'Final resolution is {str((height, width))}.') + refiner_swap_method = 'upscale' if 'inpaint' in goals: if len(outpaint_selections) > 0: H, W, C = inpaint_image.shape if 'top' in outpaint_selections: inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge') - input_mask = np.pad(input_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant', + inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant', constant_values=255) if 'bottom' in outpaint_selections: inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge') - input_mask = np.pad(input_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant', + inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant', constant_values=255) H, W, C = inpaint_image.shape if 'left' in outpaint_selections: inpaint_image = np.pad(inpaint_image, [[0, 0], [int(H * 0.3), 0], [0, 0]], mode='edge') - input_mask = np.pad(input_mask, [[0, 0], [int(H * 0.3), 0]], mode='constant', + inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(H * 0.3), 0]], mode='constant', constant_values=255) if 'right' in outpaint_selections: inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(H * 0.3)], [0, 0]], mode='edge') - input_mask = np.pad(input_mask, [[0, 0], [0, int(H * 0.3)]], mode='constant', + inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(H * 0.3)]], mode='constant', constant_values=255) inpaint_image = np.ascontiguousarray(inpaint_image.copy()) - input_mask = np.ascontiguousarray(input_mask.copy()) + inpaint_mask = np.ascontiguousarray(inpaint_mask.copy()) - inpaint_worker.current_task = inpaint_worker.InpaintWorker(image=inpaint_image, mask=input_mask, + inpaint_worker.current_task = inpaint_worker.InpaintWorker(image=inpaint_image, mask=inpaint_mask, is_outpaint=len(outpaint_selections) > 0) - # print(f'Inpaint task: {str((height, width))}') - # outputs.append(['results', inpaint_worker.current_task.visualize_mask_processing()]) - # return + pipeline.final_unet.model.diffusion_model.in_inpaint = True + + if advanced_parameters.debugging_cn_preprocessor: + outputs.append(['results', inpaint_worker.current_task.visualize_mask_processing()]) + return + + progressbar(13, 'VAE Inpaint encoding ...') + + inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill) + inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image) + inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask) + + latent_inpaint, latent_mask = core.encode_vae_inpaint( + mask=inpaint_pixel_mask, + vae=pipeline.final_vae, + pixels=inpaint_pixel_image) + + latent_swap = None + if pipeline.final_refiner_vae is not None: + progressbar(13, 'VAE Inpaint SD15 encoding ...') + latent_swap = core.encode_vae( + vae=pipeline.final_refiner_vae, + pixels=inpaint_pixel_fill)['samples'] progressbar(13, 'VAE encoding ...') - inpaint_pixels = core.numpy_to_pytorch(inpaint_worker.current_task.image_ready) - initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=inpaint_pixels) - inpaint_latent = initial_latent['samples'] - B, C, H, W = inpaint_latent.shape - input_mask = core.numpy_to_pytorch(inpaint_worker.current_task.mask_ready[None]) - input_mask = torch.nn.functional.avg_pool2d(input_mask, (8, 8)) - input_mask = torch.nn.functional.interpolate(input_mask, (H, W), mode='bilinear') - inpaint_worker.current_task.load_latent(latent=inpaint_latent, mask=input_mask) - - progressbar(13, 'VAE inpaint encoding ...') - - input_mask = (inpaint_worker.current_task.mask_ready > 0).astype(np.float32) - input_mask = torch.tensor(input_mask).float() - - vae_dict = core.encode_vae_inpaint( - mask=input_mask, vae=pipeline.final_vae, pixels=inpaint_pixels) - - inpaint_latent = vae_dict['samples'] - input_mask = vae_dict['noise_mask'] - inpaint_worker.current_task.load_inpaint_guidance(latent=inpaint_latent, mask=input_mask, - model_path=inpaint_head_model_path) - - B, C, H, W = inpaint_latent.shape - final_height, final_width = inpaint_worker.current_task.image_raw.shape[:2] + latent_fill = core.encode_vae( + vae=pipeline.final_vae, + pixels=inpaint_pixel_fill)['samples'] + + inpaint_worker.current_task.load_latent(latent_fill=latent_fill, + latent_inpaint=latent_inpaint, + latent_mask=latent_mask, + latent_swap=latent_swap, + inpaint_head_model_path=inpaint_head_model_path) + + B, C, H, W = latent_fill.shape height, width = H * 8, W * 8 + final_height, final_width = inpaint_worker.current_task.image.shape[:2] + initial_latent = {'samples': latent_fill} print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.') if 'cn' in goals: @@ -483,10 +500,8 @@ def build_advanced_parameters(): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if advanced_parameters.debugging_cn_preprocessor: - outputs.append(['results', [cn_img], task['task_seed']]) - results = make_results_from_outputs() - task_queue.finish_task(task_seq, results, False) - return results + outputs.append(['results', [cn_img]]) + return for task in cn_tasks[flags.cn_cpds]: cn_img, cn_stop, cn_weight = task cn_img = resize_image(HWC3(cn_img), width=width, height=height) @@ -494,10 +509,8 @@ def build_advanced_parameters(): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if advanced_parameters.debugging_cn_preprocessor: - outputs.append(['results', [cn_img], task['task_seed']]) - results = make_results_from_outputs() - task_queue.finish_task(task_seq, results, False) - return results + outputs.append(['results', [cn_img]]) + return for task in cn_tasks[flags.cn_ip]: cn_img, cn_stop, cn_weight = task cn_img = HWC3(cn_img) @@ -507,10 +520,8 @@ def build_advanced_parameters(): task[0] = ip_adapter.preprocess(cn_img) if advanced_parameters.debugging_cn_preprocessor: - outputs.append(['results', [cn_img], task['task_seed']]) - results = make_results_from_outputs() - task_queue.finish_task(task_seq, results, False) - return results + outputs.append(['results', [cn_img]]) + return if len(cn_tasks[flags.cn_ip]) > 0: pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, cn_tasks[flags.cn_ip]) @@ -571,7 +582,7 @@ def callback(step, x0, x, total_steps, y): denoise=denoising_strength, tiled=tiled, cfg_scale=cfg_scale, - refiner_swap_method=advanced_parameters.refiner_swap_method + refiner_swap_method=refiner_swap_method ) del task['c'], task['uc'], positive_cond, negative_cond # Save memory @@ -581,8 +592,8 @@ def callback(step, x0, x, total_steps, y): for x in imgs: d = [ - ('Prompt', raw_prompt), - ('Negative Prompt', raw_negative_prompt), + ('Prompt', task['log_positive_prompt']), + ('Negative Prompt', task['log_negative_prompt']), ('Fooocus V2 Expansion', task['expansion']), ('Styles', str(raw_style_selections)), ('Performance', performance_selection), @@ -599,8 +610,7 @@ def callback(step, x0, x, total_steps, y): for n, w in loras_raw: if n != 'None': d.append((f'LoRA [{n}] weight', w)) - if save_log: - log(x, d, single_line_number=3) + log(x, d, single_line_number=3) # Fooocus async_worker.py code end