Merge pull request #15 from jhj0517/feature/image-restoration

Add image restoration with RealESRGAN
jhj0517 · Nov 11, 2024 · 52a96bb · 52a96bb
2 parents 2bf87b3 + c387a24
commit 52a96bb
Show file tree

Hide file tree

Showing 15 changed files with 777 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -52,7 +52,10 @@ docker compose -f docker/docker-compose.yaml up
 
 Update the [`docker-compose.yaml`](https://github.com/jhj0517/AdvancedLivePortrait-WebUI/blob/master/docker/docker-compose.yaml) to match your environment if you're not using an Nvidia GPU.
 
-## ❤️ Citation and Thanks
+### 🌐 Translation 
+Any PRs for language translation for [`translation.yaml`](https://github.com/jhj0517/AdvancedLivePortrait-WebUI/blob/master/i18n/translation.yaml) would be greatly appreciated!
+
+## ❤️ Acknowledgement
 1. LivePortrait paper comes from
 ```bibtex
 @article{guo2024liveportrait,
@@ -65,8 +68,6 @@ Update the [`docker-compose.yaml`](https://github.com/jhj0517/AdvancedLivePortra
 2. The models are safetensors that have been converted by kijai. : https://github.com/kijai/ComfyUI-LivePortraitKJ
 3. [ultralytics](https://github.com/ultralytics/ultralytics) is used to detect the face.
 4. This WebUI is started from [ComfyUI-AdvancedLivePortrait](https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait), various facial expressions like AAA, EEE, Eyebrow, Wink are found by PowerHouseMan.
-
-### 🌐 Translation 
-Any PRs for language translation for [`translation.yaml`](https://github.com/jhj0517/AdvancedLivePortrait-WebUI/blob/master/i18n/translation.yaml) would be greatly appreciated!
+5. [RealESRGAN](https://github.com/xinntao/Real-ESRGAN) is used for image restoration.
 
 
diff --git a/app.py b/app.py
@@ -41,7 +41,9 @@ def create_expression_parameters():
             gr.Slider(label=_("Sample Ratio"), minimum=-0.2, maximum=1.2, step=0.01, value=1, visible=False),
             gr.Dropdown(label=_("Sample Parts"), visible=False,
                         choices=[part.value for part in SamplePart], value=SamplePart.ALL.value),
-            gr.Slider(label=_("Face Crop Factor"), minimum=1.5, maximum=2.5, step=0.1, value=2)
+            gr.Slider(label=_("Face Crop Factor"), minimum=1.5, maximum=2.5, step=0.1, value=2),
+            gr.Checkbox(label=_("Enable Image Restoration"),
+                        info=_("This enables image restoration with RealESRGAN but slows down the speed"), value=False)
         ]
 
     @staticmethod
@@ -53,6 +55,8 @@ def create_video_parameters():
             gr.Slider(label=_("First frame eyes alignment factor"), minimum=0, maximum=1, step=0.01, value=1),
             gr.Slider(label=_("First frame mouth alignment factor"), minimum=0, maximum=1, step=0.01, value=1),
             gr.Slider(label=_("Face Crop Factor"), minimum=1.5, maximum=2.5, step=0.1, value=2),
+            gr.Checkbox(label=_("Enable Image Restoration"),
+                        info=_("This enables image restoration with RealESRGAN but slows down the speed"), value=False)
         ]
 
     def launch(self):

diff --git a/i18n/translation.yaml b/i18n/translation.yaml
@@ -32,6 +32,8 @@ en: # English
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 ko: # Korean
   Language: 언어
@@ -67,6 +69,8 @@ ko: # Korean
   First frame mouth alignment factor: 첫 프레임 입 반영 비율
   First frame eyes alignment factor: 첫 프레임 눈 반영 비율
   Face Crop Factor: 얼굴 크롭 비율
+  Enable Image Restoration: 화질 향상
+  This enables image restoration with RealESRGAN but slows down the speed: RealESRGAN 으로 화질을 향상 시킵니다. 속도는 느려집니다.
 
 ja: # Japanese
   Language: 言語
@@ -102,6 +106,8 @@ ja: # Japanese
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 es: # Spanish
   Language: Idioma
@@ -137,6 +143,8 @@ es: # Spanish
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 fr: # French
   Language: Langue
@@ -172,6 +180,8 @@ fr: # French
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 de: # German
   Language: Sprache
@@ -207,6 +217,8 @@ de: # German
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 zh: # Chinese
   Language: 语言
@@ -242,6 +254,8 @@ zh: # Chinese
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 uk: # Ukrainian
   Language: Мова
@@ -277,6 +291,8 @@ uk: # Ukrainian
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 ru: # Russian
   Language: Язык
@@ -312,6 +328,8 @@ ru: # Russian
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
 
 tr: # Turkish
   Language: Dil
@@ -347,3 +365,5 @@ tr: # Turkish
   First frame mouth alignment factor: First frame mouth alignment factor
   First frame eyes alignment factor: First frame eyes alignment factor
   Face Crop Factor: Face Crop Factor
+  Enable Image Restoration: Enable Image Restoration
+  This enables image restoration with RealESRGAN but slows down the speed: This enables image restoration with RealESRGAN but slows down the speed
diff --git a/modules/image_restoration/__init__.py b/modules/image_restoration/__init__.py
diff --git a/modules/image_restoration/real_esrgan/__init__.py b/modules/image_restoration/real_esrgan/__init__.py
diff --git a/modules/image_restoration/real_esrgan/model_downloader.py b/modules/image_restoration/real_esrgan/model_downloader.py
@@ -0,0 +1,15 @@
+from modules.live_portrait.model_downloader import download_model
+
+MODELS_REALESRGAN_URL = {
+    "realesr-general-x4v3": "https://huggingface.co/jhj0517/realesr-general-x4v3/resolve/main/realesr-general-x4v3.pth",
+    "RealESRGAN_x2": "https://huggingface.co/ai-forever/Real-ESRGAN/resolve/main/RealESRGAN_x2.pth",
+}
+
+MODELS_REALESRGAN_SCALABILITY = {
+    "realesr-general-x4v3": [1, 2, 4],
+    "RealESRGAN_x2": [2]
+}
+
+
+def download_resrgan_model(file_path, url):
+    return download_model(file_path, url)
diff --git a/modules/image_restoration/real_esrgan/real_esrgan_inferencer.py b/modules/image_restoration/real_esrgan/real_esrgan_inferencer.py
@@ -0,0 +1,120 @@
+import os.path
+import gradio as gr
+import torch
+import cv2
+from typing import Optional, Literal
+
+from modules.utils.paths import *
+from modules.utils.image_helper import save_image
+from .model_downloader import download_resrgan_model, MODELS_REALESRGAN_URL, MODELS_REALESRGAN_SCALABILITY
+from .wrapper.rrdb_net import RRDBNet
+from .wrapper.real_esrganer import RealESRGANer
+from .wrapper.srvgg_net_compact import SRVGGNetCompact
+
+
+class RealESRGANInferencer:
+    def __init__(self,
+                 model_dir: str = MODELS_REAL_ESRGAN_DIR,
+                 output_dir: str = OUTPUTS_DIR):
+        self.model_dir = model_dir
+        self.output_dir = output_dir
+        self.device = self.get_device()
+        self.arc = None
+        self.model = None
+        self.face_enhancer = None
+
+        self.available_models = list(MODELS_REALESRGAN_URL.keys())
+        self.default_model = self.available_models[0]
+        self.model_config = {
+            "model_name": self.default_model,
+            "scale": 1,
+            "half_precision": True
+        }
+
+    def load_model(self,
+                   model_name: Optional[str] = None,
+                   scale: Literal[1, 2, 4] = 1,
+                   half_precision: bool = True,
+                   progress: gr.Progress = gr.Progress()):
+        model_config = {
+            "model_name": model_name,
+            "scale": scale,
+            "half_precision": half_precision
+        }
+        if model_config == self.model_config and self.model is not None:
+            return
+        else:
+            self.model_config = model_config
+
+        if model_name is None:
+            model_name = self.default_model
+
+        model_path = os.path.join(self.model_dir, model_name)
+        if not model_name.endswith(".pth"):
+            model_path += ".pth"
+
+        if not os.path.exists(model_path):
+            progress(0, f"Downloading RealESRGAN model to : {model_path}")
+            download_resrgan_model(model_path, MODELS_REALESRGAN_URL[model_name])
+
+        name, ext = os.path.splitext(model_name)
+        assert scale in MODELS_REALESRGAN_SCALABILITY[name]
+        if name == 'RealESRGAN_x2':  # x4 RRDBNet model
+            arc = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+            netscale = 4
+        else:  # x4 VGG-style model (S size) : "realesr-general-x4v3"
+            arc = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+            netscale = 4
+
+        self.model = RealESRGANer(
+            scale=netscale,
+            model_path=model_path,
+            model=arc,
+            half=half_precision,
+            device=torch.device(self.get_device())
+        )
+
+    def restore_image(self,
+                      img_path: str,
+                      model_name: Optional[str] = None,
+                      scale: int = 1,
+                      half_precision: Optional[bool] = None,
+                      overwrite: bool = True):
+        model_config = {
+            "model_name": self.model_config["model_name"],
+            "scale": scale,
+            "half_precision": half_precision
+        }
+        half_precision = True if self.device == "cuda" else False
+
+        if self.model is None or self.model_config != model_config:
+            self.load_model(
+                model_name=self.default_model if model_name is None else model_name,
+                scale=scale,
+                half_precision=half_precision
+            )
+
+        try:
+            with torch.autocast(device_type=self.device, enabled=(self.device == "cuda")):
+                output, img_mode = self.model.enhance(img_path, outscale=scale)
+            if img_mode == "RGB":
+                output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
+
+            if overwrite:
+                output_path = img_path
+            else:
+                output_path = get_auto_incremental_file_path(self.output_dir, extension="png")
+
+            output_path = save_image(output, output_path=output_path)
+            return output_path
+        except Exception as e:
+            raise
+
+    @staticmethod
+    def get_device():
+        if torch.cuda.is_available():
+            return "cuda"
+        elif torch.backends.mps.is_available():
+            return "mps"
+        else:
+            return "cpu"
diff --git a/modules/image_restoration/real_esrgan/wrapper/__init__.py b/modules/image_restoration/real_esrgan/wrapper/__init__.py