Skip to content

Commit

Permalink
format all py files
Browse files Browse the repository at this point in the history
  • Loading branch information
Nuullll committed Nov 15, 2024
1 parent 40f9c93 commit 39fe985
Show file tree
Hide file tree
Showing 20 changed files with 599 additions and 206 deletions.
260 changes: 206 additions & 54 deletions service/attention.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions service/device_detect.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import torch
import intel_extension_for_pytorch as ipex # noqa: F401
import intel_extension_for_pytorch as ipex # noqa: F401

# filter out non-Arc devices
supported_ids = []
for i in range(torch.xpu.device_count()):
props = torch.xpu.get_device_properties(i)
if 'arc' in props.name.lower():
if "arc" in props.name.lower():
supported_ids.append(str(i))

print(','.join(supported_ids))
print(",".join(supported_ids))
10 changes: 7 additions & 3 deletions service/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ class ModelDownloaderApi:
file_queue: list
total_size: int
fs: HfFileSystem
repo_folder:str
repo_folder: str

def __init__(self):
self.file_queue = list()
self.fs = HfFileSystem()

def get_info(self, repo_id: str, is_sd=False):
self.repo_id = repo_id
self.repo_folder = repo_id.replace('/','---')
self.repo_folder = repo_id.replace("/", "---")
self.file_queue.clear()
self.total_size = 0
self.enum_file_list(repo_id, is_sd, True)
Expand Down Expand Up @@ -60,7 +60,11 @@ def enum_file_list(self, enum_path: str, is_sd=False, is_root=True):
repo_id=self.repo_id, filename=filename, subfolder=subfolder
)
self.file_queue.append(
{"name": name.replace(self.repo_id, self.repo_folder), "size": size, "url": url}
{
"name": name.replace(self.repo_id, self.repo_folder),
"size": size,
"url": url,
}
)


Expand Down
2 changes: 1 addition & 1 deletion service/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ class DownloadException(Exception):
url: str

def __init__(self, url: str):
super().__init__(f"download {url} failed")
super().__init__(f"download {url} failed")
6 changes: 1 addition & 5 deletions service/file_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,13 @@ def __start_download(self, response: requests.Response, fw: BufferedWriter):
time.sleep(1)
response, fw = self.__init_download(self.url, self.filename)


def __start_report_download_progress(self):
report_thread = Thread(target=self.__report_download_progress)
report_thread.start()
return report_thread

def __report_download_progress(self):
while (
not self.download_stop
and not self.completed
):
while not self.download_stop and not self.completed:
self.on_download_progress(
self.basename,
self.download_size,
Expand Down
7 changes: 4 additions & 3 deletions service/inpaint_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def calc_out_size(width: int, height: int, is_sdxl=False) -> tuple[int, int, int
radio = width / max
return max, make_multiple_of_8(int(height / radio)), radio
elif height > max:
radio = height / max
radio = height / max
return make_multiple_of_8(int(width / radio)), max, radio
return make_multiple_of_8(width), make_multiple_of_8(height), 1

Expand All @@ -86,21 +86,22 @@ def resize_by_max(image: Image.Image, max_size: int, multiple_of_8=True):
if image.width > image.height:
downscale_ratio = image.width / max_size
downscale_width = int(image.width / downscale_ratio)
downscale_height = int(image.height/ downscale_ratio)
downscale_height = int(image.height / downscale_ratio)
if multiple_of_8:
new_width = make_multiple_of_8(downscale_width)
new_height = make_multiple_of_8(downscale_height)
return image.resize((new_width, new_height)), downscale_ratio
else:
downscale_ratio = image.height / max_size
downscale_width = int(image.width / downscale_ratio)
downscale_height = int(image.height/ downscale_ratio)
downscale_height = int(image.height / downscale_ratio)
if multiple_of_8:
new_width = make_multiple_of_8(downscale_width)
new_height = make_multiple_of_8(downscale_height)
return image.resize((new_width, new_height)), downscale_ratio
return image, 1


# def resize_by_max(image: Image.Image, max_size):
# if image.width > max_size or image.height > max_size:
# aspect_ratio = image.width / image.height
Expand Down
15 changes: 9 additions & 6 deletions service/lama.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import cv2
import torch
import numpy as np
Expand All @@ -17,6 +16,7 @@ def get_image(img):
img = img.astype(np.float32) / 255
return img


def prepare_img_and_mask(image, mask, device, pad_out_to_modulo=8, scale_factor=None):
def ceil_modulo(x, mod):
if x % mod == 0:
Expand Down Expand Up @@ -48,7 +48,9 @@ def scale_image(img, factor, interpolation=cv2.INTER_AREA):
img = img[0]
else:
img = np.transpose(img, (1, 2, 0))
img = cv2.resize(img, dsize=None, fx=factor, fy=factor, interpolation=interpolation)
img = cv2.resize(
img, dsize=None, fx=factor, fy=factor, interpolation=interpolation
)
if img.ndim == 2:
img = img[None, ...]
else:
Expand Down Expand Up @@ -96,7 +98,7 @@ def __call__(self, image: Image.Image | np.ndarray, mask: Image.Image | np.ndarr
if image is None:
return None
if mask is None:
mask = Image.new('L', image.size, 0)
mask = Image.new("L", image.size, 0)
return None
image, mask = prepare_img_and_mask(image, mask, self.device)
with torch.inference_mode():
Expand All @@ -105,10 +107,11 @@ def __call__(self, image: Image.Image | np.ndarray, mask: Image.Image | np.ndarr
cur_res = np.clip(cur_res * 255, 0, 255).astype(np.uint8)
cur_res = Image.fromarray(cur_res)
return cur_res



if __name__ == "__main__":
lama = SimpleLama()
image = Image.open("C:\\Users\\X\\Desktop\\inpaint_test.png")
mask_image = Image.open("C:\\Users\\X\\Desktop\\1mask.png")
result_image = lama(image,mask_image)
result_image.show()
result_image = lama(image, mask_image)
result_image.show()
31 changes: 17 additions & 14 deletions service/llm_biz.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,17 @@
class LLMParams:
prompt: List[Dict[str, str]]
device: int
enable_rag: bool
enable_rag: bool
model_repo_id: str
print_metrics: bool

def __init__(
self, prompt: list, device: int, enable_rag: bool, model_repo_id: str, print_metrics: bool = True
self,
prompt: list,
device: int,
enable_rag: bool,
model_repo_id: str,
print_metrics: bool = True,
) -> None:
self.prompt = prompt
self.device = device
Expand All @@ -52,10 +57,9 @@ def __init__(
_stop_event = threading.Event()
_last_repo_id: str = None
_default_prompt = {
"role": "system",
"content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user. Please keep the output text language the same as the user input.",
}

"role": "system",
"content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user. Please keep the output text language the same as the user input.",
}


def user_stop(input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs):
Expand Down Expand Up @@ -87,7 +91,7 @@ def generate(
logging.debug(f"got prompt: {prompt}")
global _stop_generate, _default_prompt
_stop_generate = False

chat_history = [_default_prompt]
prompt_len = prompt.__len__()
i = 0
Expand All @@ -99,16 +103,14 @@ def generate(
)
i = i + 1



new_prompt = tokenizer.apply_chat_template(
chat_history, tokenize=False, add_generation_prompt=True
chat_history, tokenize=False, add_generation_prompt=True
)

while len(tokenizer.tokenize(new_prompt)) > 2000:
chat_history.remove(chat_history[1])
new_prompt = tokenizer.apply_chat_template(
chat_history, tokenize=False, add_generation_prompt=True
chat_history, tokenize=False, add_generation_prompt=True
)

model_inputs = tokenizer(new_prompt, return_tensors="pt").to(model_config.device)
Expand Down Expand Up @@ -148,6 +150,7 @@ def process_rag(
text_out_callback: Callable[[str, int], None] = None,
):
import rag

rag.to(model_config.device)
query_success, context, rag_source = rag.query(prompt)
if query_success:
Expand Down Expand Up @@ -197,13 +200,13 @@ def chat(
load_model_callback("start")
start = time.time()

load_in_low_bit="sym_int4"
load_in_low_bit = "sym_int4"

_model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
trust_remote_code=True,
load_in_low_bit= load_in_low_bit,
load_in_low_bit=load_in_low_bit,
# load_in_4bit=True,
)

Expand Down
34 changes: 22 additions & 12 deletions service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
import time
import traceback
import torch
from transformers import pipeline,PreTrainedModel,TextIteratorStreamer
from transformers import pipeline, PreTrainedModel, TextIteratorStreamer
import intel_extension_for_pytorch as ipex

def stream_chat_generate(model:PreTrainedModel, args:dict):

def stream_chat_generate(model: PreTrainedModel, args: dict):
try:
print("generate start")
start = time.time()
Expand All @@ -17,23 +17,33 @@ def stream_chat_generate(model:PreTrainedModel, args:dict):
except Exception:
traceback.print_exc()


if __name__ == "__main__":
pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16)
pipe = pipeline(
"text-generation",
model="microsoft/Phi-3-mini-4k-instruct",
torch_dtype=torch.bfloat16,
)

# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
{
"role": "system",
"content": "You are a friendly chatbot who always responds in the style of a pirate",
},
{"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
{
"role": "user",
"content": "How many helicopters can a human eat in one sitting?",
},
]
pipe.model.eval()
pipe.model.to("xpu")
model = ipex.optimize(pipe.model, dtype=torch.bfloat16)
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
prompt = pipe.tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True, return_tensors="pt"
)
encoding = pipe.tokenizer.encode_plus(prompt, return_tensors="pt").to("xpu")
tensor : torch.Tensor = encoding.get("input_ids")
tensor: torch.Tensor = encoding.get("input_ids")
streamer = TextIteratorStreamer(
pipe.tokenizer,
skip_prompt=False, # skip prompt in the generated tokens
Expand All @@ -45,13 +55,13 @@ def stream_chat_generate(model:PreTrainedModel, args:dict):
num_beams=1,
do_sample=True,
max_new_tokens=256,
temperature=0.7,
top_k=50,
top_p=0.95
temperature=0.7,
top_k=50,
top_p=0.95,
)
torch.xpu.synchronize()
Thread(target=stream_chat_generate, args=(pipe.model,generate_kwargs)).start()
Thread(target=stream_chat_generate, args=(pipe.model, generate_kwargs)).start()

for stream_output in streamer:
print(stream_output, end="")
print()
11 changes: 5 additions & 6 deletions service/model_config.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@

# CONFIG_PATH = "./model_config.json"

config = {
"llm": "./models/llm/checkpoints",
"embedding":"./models/llm/embedding",
"embedding": "./models/llm/embedding",
"stableDiffusion": "./models/stable_diffusion/checkpoints",
"lora": "./models/stable_diffusion/lora",
"vae": "./models/stable_diffusion/vae",
"inpaint": "./models/stable_diffusion/inpaint",
"ESRGAN":"./models/stable_diffusion/ESRGAN",
"preview":"./models/stable_diffusion/preview"
}
"ESRGAN": "./models/stable_diffusion/ESRGAN",
"preview": "./models/stable_diffusion/preview",
}

device = "xpu"
env_type ="arc"
env_type = "arc"
10 changes: 6 additions & 4 deletions service/model_download_adpater.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Model_Downloader_Adapter:
file_downloader: FileDownloader
hf_downloader: HFPlaygroundDownloader
has_error: bool
user_stop:bool
user_stop: bool

def __init__(self, hf_token=None):
self.msg_queue = Queue(-1)
Expand Down Expand Up @@ -46,7 +46,10 @@ def download_model_progress_callback(
):
print(
"download {} {}/{} speed {}".format(
repo_id, bytes2human(download_size), bytes2human(total_size), bytes2human(speed)
repo_id,
bytes2human(download_size),
bytes2human(total_size),
bytes2human(speed),
)
)
data = {
Expand Down Expand Up @@ -117,7 +120,7 @@ def __start_download(self, list: list):
realesrgan.ESRGAN_MODEL_URL,
os.path.join(
utils.get_model_path(item["type"]),
os.path.basename(realesrgan.ESRGAN_MODEL_URL)
os.path.basename(realesrgan.ESRGAN_MODEL_URL),
),
)
else:
Expand All @@ -134,7 +137,6 @@ def stop_download(self):
if not self.hf_downloader.completed:
self.hf_downloader.stop_download()


def generator(self):
while True:
while not self.msg_queue.empty():
Expand Down
Loading

0 comments on commit 39fe985

Please sign in to comment.