From e5ee3fdc19cc693fc4cccffc43f9633efc4f892f Mon Sep 17 00:00:00 2001
From: xuyuan23 <643854343@qq.com>
Date: Sat, 12 Aug 2023 21:35:56 +0800
Subject: [PATCH] add video generation in doc
---
README.md | 7 +++--
main.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 84 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index a71239e..e4222f1 100644
--- a/README.md
+++ b/README.md
@@ -53,12 +53,12 @@ git lfs install
git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
# [Options]
-# Size: 94 GB, stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
+# Size: 94 GB, supported run in cpu model(RAM>14 GB). stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
git lfs install
git clone https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
# [Options]
-# Size: 10 GB, Text2Video service is recommended. https://github.com/xuyuan23/Text2Video
+# Size: 16 GB, supported run in cpu model(RAM>16 GB). Text2Video service is recommended. https://github.com/xuyuan23/Text2Video
git lfs install
git clone https://huggingface.co/cerspense/zeroscope_v2_576w
```
@@ -89,6 +89,9 @@ OPEN_AI_KEY=sk-xxx
# If you don't deploy stable diffusion service, no image will be generated.
SD_PROXY_URL=127.0.0.1:7860
+
+# If you don't deploy Text2Video service, no videos will be generated.
+T2V_PROXY_URL=127.0.0.1:7861
```
- More Details see file `.env.template`
diff --git a/main.py b/main.py
index 3f8e7ea..0d43f09 100644
--- a/main.py
+++ b/main.py
@@ -13,6 +13,9 @@
from operategpt.providers import sd_proxy
from dotenv import load_dotenv
+from operategpt.providers.base import T2VPrompt
+from operategpt.providers.text2video_proxy import t2v_request
+
load_dotenv(verbose=True, override=True)
OPEN_AI_PROXY_SERVER_URL = os.getenv("OPEN_AI_PROXY_SERVER_URL", "https://api.openai.com/v1/chat/completions")
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
@@ -29,6 +32,11 @@
```
{1}
```
+
+Please insert the following videos at the different appropriate locations in the document, not the same locations, you can use the format such as ``, if the video list is empty, please ignore.
+```
+{2}
+```
"""
IMAGE_DESC_PROMPT = """Based on the content below, select 3 to 5 relevant events or content information and describe them along with their respective characteristics:
@@ -43,6 +51,19 @@
"""
+VIDEO_DESC_PROMPT = """Based on the content below, summarize a core thing, as well as related functions and processes
+```
+{0}
+```
+
+Please provide an answer similar to the one below, but without any additional information, details start with , end with , no content beyound tag and .
+You should response me follow next format, only one json data with some key-value data:
+
+ {{"video-name-1": ""}}
+
+"""
+
+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
PROJECT_DATA_PATH = os.path.join(ROOT_DIR, "data")
@@ -157,6 +178,56 @@ def generate_images(converted_dict: dict) -> str:
return str(image_dict)
+def parse_video_info(summary_data: str) -> dict:
+ videos_prompt_info = VIDEO_DESC_PROMPT.format(summary_data)
+ logger.info(
+ f"\n====================================videos_prompt_info=\n{videos_prompt_info}"
+ )
+
+ video_info = query_from_openai_proxy(videos_prompt_info)
+ logger.info(f"\n====================================image_info=\n {video_info}")
+
+ # Extract the content within the ImagePrompt tag
+ start_index = video_info.index("") + 13
+ end_index = video_info.index("")
+ content = video_info[start_index:end_index]
+ logger.info(
+ f"\n=====================================extract json prompt from video_info=\n{content}"
+ )
+
+ data_dict = json.loads(content)
+ converted_dict = {key.replace(" ", "_"): value for key, value in data_dict.items()}
+ return converted_dict
+
+
+def generate_videos(converted_dict: dict) -> str:
+ video_dict = []
+ try:
+ if len(converted_dict) == 0:
+ return "No Videos"
+ index = 0
+ logger.info(
+ f"parse_video_info: start generate videos, total: {len(converted_dict)}, current: {index}"
+ )
+ # start request text2video:
+ for video_name, video_prompt in converted_dict.items():
+ index += 1
+ t2v_prompt = T2VPrompt()
+ t2v_prompt.prompt = video_prompt
+ download_url = t2v_request(t2v_prompt)
+ if download_url is None:
+ continue
+
+ video_dict.append({"video_name": video_name, "url": download_url})
+ logger.info(
+ f"parse_video_info: generating videos, total: {len(converted_dict)}, completed: {index}, video_dict={str(video_dict)}"
+ )
+ return str(video_dict)
+ except Exception as e:
+ logger.info(f"generate_videos exception: {str(e)}")
+ return str(video_dict)
+
+
def write_markdown_content(content, filename, filepath):
if not os.path.exists(filepath):
os.makedirs(filepath)
@@ -201,7 +272,14 @@ async def startup(idea: str):
image_data = generate_images(image_prompt_dict)
logger.info(f"\ncompleted generate_images=\n{image_data}")
- prompt_req = OPERATE_PROMPT.format(summary_data, image_data)
+ # if exist Text2Video model, add video info
+ video_prompt_dict = parse_video_info(summary_data)
+ logger.info(f"\ncompleted parse_video_info=\n{video_prompt_dict}")
+
+ video_data = generate_videos(video_prompt_dict)
+ logger.info(f"\ncompleted generate_videos=\n{video_data}")
+
+ prompt_req = OPERATE_PROMPT.format(summary_data, image_data, video_data)
logger.info(f"\ngenerated markdown content prompt request=\n{prompt_req}")
result = query_from_openai_proxy(prompt_req)