format all py files

intel · Nov 15, 2024 · 39fe985 · 39fe985
1 parent 40f9c93
commit 39fe985
Show file tree

Hide file tree

Showing 20 changed files with 599 additions and 206 deletions.
diff --git a/service/attention.py b/service/attention.py
diff --git a/service/device_detect.py b/service/device_detect.py
@@ -1,11 +1,11 @@
 import torch
-import intel_extension_for_pytorch as ipex # noqa: F401
+import intel_extension_for_pytorch as ipex  # noqa: F401
 
 # filter out non-Arc devices
 supported_ids = []
 for i in range(torch.xpu.device_count()):
     props = torch.xpu.get_device_properties(i)
-    if 'arc' in props.name.lower():
+    if "arc" in props.name.lower():
         supported_ids.append(str(i))
 
-print(','.join(supported_ids))
+print(",".join(supported_ids))
diff --git a/service/downloader.py b/service/downloader.py
@@ -9,15 +9,15 @@ class ModelDownloaderApi:
     file_queue: list
     total_size: int
     fs: HfFileSystem
-    repo_folder:str
+    repo_folder: str
 
     def __init__(self):
         self.file_queue = list()
         self.fs = HfFileSystem()
 
     def get_info(self, repo_id: str, is_sd=False):
         self.repo_id = repo_id
-        self.repo_folder = repo_id.replace('/','---')
+        self.repo_folder = repo_id.replace("/", "---")
         self.file_queue.clear()
         self.total_size = 0
         self.enum_file_list(repo_id, is_sd, True)
@@ -60,7 +60,11 @@ def enum_file_list(self, enum_path: str, is_sd=False, is_root=True):
                     repo_id=self.repo_id, filename=filename, subfolder=subfolder
                 )
                 self.file_queue.append(
-                    {"name": name.replace(self.repo_id, self.repo_folder), "size": size, "url": url}
+                    {
+                        "name": name.replace(self.repo_id, self.repo_folder),
+                        "size": size,
+                        "url": url,
+                    }
                 )
 
 

diff --git a/service/exceptions.py b/service/exceptions.py
@@ -2,4 +2,4 @@ class DownloadException(Exception):
     url: str
 
     def __init__(self, url: str):
-        super().__init__(f"download {url} failed")
+        super().__init__(f"download {url} failed")
diff --git a/service/file_downloader.py b/service/file_downloader.py
@@ -106,17 +106,13 @@ def __start_download(self, response: requests.Response, fw: BufferedWriter):
                     time.sleep(1)
                     response, fw = self.__init_download(self.url, self.filename)
 
-
     def __start_report_download_progress(self):
         report_thread = Thread(target=self.__report_download_progress)
         report_thread.start()
         return report_thread
 
     def __report_download_progress(self):
-        while (
-            not self.download_stop
-            and not self.completed
-        ):
+        while not self.download_stop and not self.completed:
             self.on_download_progress(
                 self.basename,
                 self.download_size,

diff --git a/service/inpaint_utils.py b/service/inpaint_utils.py
@@ -72,7 +72,7 @@ def calc_out_size(width: int, height: int, is_sdxl=False) -> tuple[int, int, int
             radio = width / max
             return max, make_multiple_of_8(int(height / radio)), radio
     elif height > max:
-        radio =  height / max
+        radio = height / max
         return make_multiple_of_8(int(width / radio)), max, radio
     return make_multiple_of_8(width), make_multiple_of_8(height), 1
 
@@ -86,21 +86,22 @@ def resize_by_max(image: Image.Image, max_size: int, multiple_of_8=True):
         if image.width > image.height:
             downscale_ratio = image.width / max_size
             downscale_width = int(image.width / downscale_ratio)
-            downscale_height = int(image.height/ downscale_ratio)
+            downscale_height = int(image.height / downscale_ratio)
             if multiple_of_8:
                 new_width = make_multiple_of_8(downscale_width)
                 new_height = make_multiple_of_8(downscale_height)
             return image.resize((new_width, new_height)), downscale_ratio
         else:
             downscale_ratio = image.height / max_size
             downscale_width = int(image.width / downscale_ratio)
-            downscale_height = int(image.height/ downscale_ratio)
+            downscale_height = int(image.height / downscale_ratio)
             if multiple_of_8:
                 new_width = make_multiple_of_8(downscale_width)
                 new_height = make_multiple_of_8(downscale_height)
             return image.resize((new_width, new_height)), downscale_ratio
     return image, 1
 
+
 # def resize_by_max(image: Image.Image, max_size):
 #     if image.width > max_size or image.height > max_size:
 #         aspect_ratio = image.width / image.height

diff --git a/service/lama.py b/service/lama.py
@@ -1,4 +1,3 @@
-
 import cv2
 import torch
 import numpy as np
@@ -17,6 +16,7 @@ def get_image(img):
     img = img.astype(np.float32) / 255
     return img
 
+
 def prepare_img_and_mask(image, mask, device, pad_out_to_modulo=8, scale_factor=None):
     def ceil_modulo(x, mod):
         if x % mod == 0:
@@ -48,7 +48,9 @@ def scale_image(img, factor, interpolation=cv2.INTER_AREA):
             img = img[0]
         else:
             img = np.transpose(img, (1, 2, 0))
-        img = cv2.resize(img, dsize=None, fx=factor, fy=factor, interpolation=interpolation)
+        img = cv2.resize(
+            img, dsize=None, fx=factor, fy=factor, interpolation=interpolation
+        )
         if img.ndim == 2:
             img = img[None, ...]
         else:
@@ -96,7 +98,7 @@ def __call__(self, image: Image.Image | np.ndarray, mask: Image.Image | np.ndarr
         if image is None:
             return None
         if mask is None:
-            mask = Image.new('L', image.size, 0)
+            mask = Image.new("L", image.size, 0)
             return None
         image, mask = prepare_img_and_mask(image, mask, self.device)
         with torch.inference_mode():
@@ -105,10 +107,11 @@ def __call__(self, image: Image.Image | np.ndarray, mask: Image.Image | np.ndarr
             cur_res = np.clip(cur_res * 255, 0, 255).astype(np.uint8)
             cur_res = Image.fromarray(cur_res)
             return cur_res
-
+
+
 if __name__ == "__main__":
     lama = SimpleLama()
     image = Image.open("C:\\Users\\X\\Desktop\\inpaint_test.png")
     mask_image = Image.open("C:\\Users\\X\\Desktop\\1mask.png")
-    result_image = lama(image,mask_image)
-    result_image.show()
+    result_image = lama(image, mask_image)
+    result_image.show()
diff --git a/service/llm_biz.py b/service/llm_biz.py
@@ -30,12 +30,17 @@
 class LLMParams:
     prompt: List[Dict[str, str]]
     device: int
-    enable_rag: bool 
+    enable_rag: bool
     model_repo_id: str
     print_metrics: bool
 
     def __init__(
-        self, prompt: list, device: int, enable_rag: bool, model_repo_id: str, print_metrics: bool = True
+        self,
+        prompt: list,
+        device: int,
+        enable_rag: bool,
+        model_repo_id: str,
+        print_metrics: bool = True,
     ) -> None:
         self.prompt = prompt
         self.device = device
@@ -52,10 +57,9 @@ def __init__(
 _stop_event = threading.Event()
 _last_repo_id: str = None
 _default_prompt = {
-        "role": "system",
-        "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user. Please keep the output text language the same as the user input.",
-    }
-
+    "role": "system",
+    "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user. Please keep the output text language the same as the user input.",
+}
 
 
 def user_stop(input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs):
@@ -87,7 +91,7 @@ def generate(
     logging.debug(f"got prompt: {prompt}")
     global _stop_generate, _default_prompt
     _stop_generate = False
-    
+
     chat_history = [_default_prompt]
     prompt_len = prompt.__len__()
     i = 0
@@ -99,16 +103,14 @@ def generate(
             )
         i = i + 1
 
-
-
     new_prompt = tokenizer.apply_chat_template(
-         chat_history, tokenize=False, add_generation_prompt=True
+        chat_history, tokenize=False, add_generation_prompt=True
     )
-    
+
     while len(tokenizer.tokenize(new_prompt)) > 2000:
         chat_history.remove(chat_history[1])
         new_prompt = tokenizer.apply_chat_template(
-             chat_history, tokenize=False, add_generation_prompt=True
+            chat_history, tokenize=False, add_generation_prompt=True
         )
 
     model_inputs = tokenizer(new_prompt, return_tensors="pt").to(model_config.device)
@@ -148,6 +150,7 @@ def process_rag(
     text_out_callback: Callable[[str, int], None] = None,
 ):
     import rag
+
     rag.to(model_config.device)
     query_success, context, rag_source = rag.query(prompt)
     if query_success:
@@ -197,13 +200,13 @@ def chat(
                 load_model_callback("start")
             start = time.time()
 
-            load_in_low_bit="sym_int4"
+            load_in_low_bit = "sym_int4"
 
             _model = AutoModelForCausalLM.from_pretrained(
                 model_path,
                 torch_dtype=torch.float16,
                 trust_remote_code=True,
-                load_in_low_bit= load_in_low_bit,
+                load_in_low_bit=load_in_low_bit,
                 # load_in_4bit=True,
             )
 

diff --git a/service/main.py b/service/main.py
@@ -3,11 +3,11 @@
 import time
 import traceback
 import torch
-from transformers import pipeline,PreTrainedModel,TextIteratorStreamer
+from transformers import pipeline, PreTrainedModel, TextIteratorStreamer
 import intel_extension_for_pytorch as ipex
 
-def stream_chat_generate(model:PreTrainedModel, args:dict):
-    
+
+def stream_chat_generate(model: PreTrainedModel, args: dict):
     try:
         print("generate start")
         start = time.time()
@@ -17,23 +17,33 @@ def stream_chat_generate(model:PreTrainedModel, args:dict):
     except Exception:
         traceback.print_exc()
 
+
 if __name__ == "__main__":
-    pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16)
+    pipe = pipeline(
+        "text-generation",
+        model="microsoft/Phi-3-mini-4k-instruct",
+        torch_dtype=torch.bfloat16,
+    )
 
     # We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
     messages = [
         {
             "role": "system",
             "content": "You are a friendly chatbot who always responds in the style of a pirate",
         },
-        {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+        {
+            "role": "user",
+            "content": "How many helicopters can a human eat in one sitting?",
+        },
     ]
     pipe.model.eval()
     pipe.model.to("xpu")
     model = ipex.optimize(pipe.model, dtype=torch.bfloat16)
-    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
+    prompt = pipe.tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True, return_tensors="pt"
+    )
     encoding = pipe.tokenizer.encode_plus(prompt, return_tensors="pt").to("xpu")
-    tensor : torch.Tensor = encoding.get("input_ids")
+    tensor: torch.Tensor = encoding.get("input_ids")
     streamer = TextIteratorStreamer(
         pipe.tokenizer,
         skip_prompt=False,  # skip prompt in the generated tokens
@@ -45,13 +55,13 @@ def stream_chat_generate(model:PreTrainedModel, args:dict):
         num_beams=1,
         do_sample=True,
         max_new_tokens=256,
-        temperature=0.7, 
-        top_k=50, 
-        top_p=0.95
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
     )
     torch.xpu.synchronize()
-    Thread(target=stream_chat_generate, args=(pipe.model,generate_kwargs)).start()
-    
+    Thread(target=stream_chat_generate, args=(pipe.model, generate_kwargs)).start()
+
     for stream_output in streamer:
         print(stream_output, end="")
     print()
diff --git a/service/model_config.py b/service/model_config.py
@@ -1,16 +1,15 @@
-
 # CONFIG_PATH = "./model_config.json"
 
 config = {
     "llm": "./models/llm/checkpoints",
-    "embedding":"./models/llm/embedding",
+    "embedding": "./models/llm/embedding",
     "stableDiffusion": "./models/stable_diffusion/checkpoints",
     "lora": "./models/stable_diffusion/lora",
     "vae": "./models/stable_diffusion/vae",
     "inpaint": "./models/stable_diffusion/inpaint",
-    "ESRGAN":"./models/stable_diffusion/ESRGAN",
-    "preview":"./models/stable_diffusion/preview"
-  }
+    "ESRGAN": "./models/stable_diffusion/ESRGAN",
+    "preview": "./models/stable_diffusion/preview",
+}
 
 device = "xpu"
-env_type ="arc"
+env_type = "arc"
diff --git a/service/model_download_adpater.py b/service/model_download_adpater.py
@@ -17,7 +17,7 @@ class Model_Downloader_Adapter:
     file_downloader: FileDownloader
     hf_downloader: HFPlaygroundDownloader
     has_error: bool
-    user_stop:bool
+    user_stop: bool
 
     def __init__(self, hf_token=None):
         self.msg_queue = Queue(-1)
@@ -46,7 +46,10 @@ def download_model_progress_callback(
     ):
         print(
             "download {} {}/{} speed {}".format(
-                repo_id, bytes2human(download_size), bytes2human(total_size), bytes2human(speed)
+                repo_id,
+                bytes2human(download_size),
+                bytes2human(total_size),
+                bytes2human(speed),
             )
         )
         data = {
@@ -117,7 +120,7 @@ def __start_download(self, list: list):
                         realesrgan.ESRGAN_MODEL_URL,
                         os.path.join(
                             utils.get_model_path(item["type"]),
-                            os.path.basename(realesrgan.ESRGAN_MODEL_URL)
+                            os.path.basename(realesrgan.ESRGAN_MODEL_URL),
                         ),
                     )
                 else:
@@ -134,7 +137,6 @@ def stop_download(self):
         if not self.hf_downloader.completed:
             self.hf_downloader.stop_download()
 
-
     def generator(self):
         while True:
             while not self.msg_queue.empty():