From e5ee3fdc19cc693fc4cccffc43f9633efc4f892f Mon Sep 17 00:00:00 2001 From: xuyuan23 <643854343@qq.com> Date: Sat, 12 Aug 2023 21:35:56 +0800 Subject: [PATCH] add video generation in doc --- README.md | 7 +++-- main.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a71239e..e4222f1 100644 --- a/README.md +++ b/README.md @@ -53,12 +53,12 @@ git lfs install git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 # [Options] -# Size: 94 GB, stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy +# Size: 94 GB, supported run in cpu model(RAM>14 GB). stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy git lfs install git clone https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0 # [Options] -# Size: 10 GB, Text2Video service is recommended. https://github.com/xuyuan23/Text2Video +# Size: 16 GB, supported run in cpu model(RAM>16 GB). Text2Video service is recommended. https://github.com/xuyuan23/Text2Video git lfs install git clone https://huggingface.co/cerspense/zeroscope_v2_576w ``` @@ -89,6 +89,9 @@ OPEN_AI_KEY=sk-xxx # If you don't deploy stable diffusion service, no image will be generated. SD_PROXY_URL=127.0.0.1:7860 + +# If you don't deploy Text2Video service, no videos will be generated. +T2V_PROXY_URL=127.0.0.1:7861 ``` - More Details see file `.env.template` diff --git a/main.py b/main.py index 3f8e7ea..0d43f09 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,9 @@ from operategpt.providers import sd_proxy from dotenv import load_dotenv +from operategpt.providers.base import T2VPrompt +from operategpt.providers.text2video_proxy import t2v_request + load_dotenv(verbose=True, override=True) OPEN_AI_PROXY_SERVER_URL = os.getenv("OPEN_AI_PROXY_SERVER_URL", "https://api.openai.com/v1/chat/completions") OPEN_AI_KEY = os.getenv("OPEN_AI_KEY") @@ -29,6 +32,11 @@ ``` {1} ``` + +Please insert the following videos at the different appropriate locations in the document, not the same locations, you can use the format such as ``, if the video list is empty, please ignore. +``` +{2} +``` """ IMAGE_DESC_PROMPT = """Based on the content below, select 3 to 5 relevant events or content information and describe them along with their respective characteristics: @@ -43,6 +51,19 @@ """ +VIDEO_DESC_PROMPT = """Based on the content below, summarize a core thing, as well as related functions and processes +``` +{0} +``` + +Please provide an answer similar to the one below, but without any additional information, details start with , end with , no content beyound tag and . +You should response me follow next format, only one json data with some key-value data: + + {{"video-name-1": ""}} + +""" + + ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) PROJECT_DATA_PATH = os.path.join(ROOT_DIR, "data") @@ -157,6 +178,56 @@ def generate_images(converted_dict: dict) -> str: return str(image_dict) +def parse_video_info(summary_data: str) -> dict: + videos_prompt_info = VIDEO_DESC_PROMPT.format(summary_data) + logger.info( + f"\n====================================videos_prompt_info=\n{videos_prompt_info}" + ) + + video_info = query_from_openai_proxy(videos_prompt_info) + logger.info(f"\n====================================image_info=\n {video_info}") + + # Extract the content within the ImagePrompt tag + start_index = video_info.index("") + 13 + end_index = video_info.index("") + content = video_info[start_index:end_index] + logger.info( + f"\n=====================================extract json prompt from video_info=\n{content}" + ) + + data_dict = json.loads(content) + converted_dict = {key.replace(" ", "_"): value for key, value in data_dict.items()} + return converted_dict + + +def generate_videos(converted_dict: dict) -> str: + video_dict = [] + try: + if len(converted_dict) == 0: + return "No Videos" + index = 0 + logger.info( + f"parse_video_info: start generate videos, total: {len(converted_dict)}, current: {index}" + ) + # start request text2video: + for video_name, video_prompt in converted_dict.items(): + index += 1 + t2v_prompt = T2VPrompt() + t2v_prompt.prompt = video_prompt + download_url = t2v_request(t2v_prompt) + if download_url is None: + continue + + video_dict.append({"video_name": video_name, "url": download_url}) + logger.info( + f"parse_video_info: generating videos, total: {len(converted_dict)}, completed: {index}, video_dict={str(video_dict)}" + ) + return str(video_dict) + except Exception as e: + logger.info(f"generate_videos exception: {str(e)}") + return str(video_dict) + + def write_markdown_content(content, filename, filepath): if not os.path.exists(filepath): os.makedirs(filepath) @@ -201,7 +272,14 @@ async def startup(idea: str): image_data = generate_images(image_prompt_dict) logger.info(f"\ncompleted generate_images=\n{image_data}") - prompt_req = OPERATE_PROMPT.format(summary_data, image_data) + # if exist Text2Video model, add video info + video_prompt_dict = parse_video_info(summary_data) + logger.info(f"\ncompleted parse_video_info=\n{video_prompt_dict}") + + video_data = generate_videos(video_prompt_dict) + logger.info(f"\ncompleted generate_videos=\n{video_data}") + + prompt_req = OPERATE_PROMPT.format(summary_data, image_data, video_data) logger.info(f"\ngenerated markdown content prompt request=\n{prompt_req}") result = query_from_openai_proxy(prompt_req)