From e5ee3fdc19cc693fc4cccffc43f9633efc4f892f Mon Sep 17 00:00:00 2001
From: xuyuan23 <643854343@qq.com>
Date: Sat, 12 Aug 2023 21:35:56 +0800
Subject: [PATCH] add video generation in doc

---
 README.md |  7 +++--
 main.py   | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 84 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index a71239e..e4222f1 100644
--- a/README.md
+++ b/README.md
@@ -53,12 +53,12 @@ git lfs install
 git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
 
 # [Options]
-# Size: 94 GB, stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
+# Size: 94 GB, supported run in cpu model(RAM>14 GB). stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
 git lfs install 
 git clone https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
 
 # [Options]
-# Size: 10 GB, Text2Video service is recommended. https://github.com/xuyuan23/Text2Video
+# Size: 16 GB, supported run in cpu model(RAM>16 GB). Text2Video service is recommended. https://github.com/xuyuan23/Text2Video
 git lfs install
 git clone https://huggingface.co/cerspense/zeroscope_v2_576w
 ```
@@ -89,6 +89,9 @@ OPEN_AI_KEY=sk-xxx
 
 # If you don't deploy stable diffusion service, no image will be generated.
 SD_PROXY_URL=127.0.0.1:7860
+
+# If you don't deploy Text2Video service, no videos will be generated.
+T2V_PROXY_URL=127.0.0.1:7861
 ```
 - More Details see file `.env.template`
 
diff --git a/main.py b/main.py
index 3f8e7ea..0d43f09 100644
--- a/main.py
+++ b/main.py
@@ -13,6 +13,9 @@
 from operategpt.providers import sd_proxy
 from dotenv import load_dotenv
 
+from operategpt.providers.base import T2VPrompt
+from operategpt.providers.text2video_proxy import t2v_request
+
 load_dotenv(verbose=True, override=True)
 OPEN_AI_PROXY_SERVER_URL = os.getenv("OPEN_AI_PROXY_SERVER_URL", "https://api.openai.com/v1/chat/completions")
 OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
@@ -29,6 +32,11 @@
 ```
 {1}
 ```
+
+Please insert the following videos at the different appropriate locations in the document, not the same locations, you can use the format such as `<video width="640" height="360" controls> <source src="http://xxxxxx.mp4" type="video/mp4">video-name</video>`, if the video list is empty, please ignore.
+```
+{2}
+```
 """
 
 IMAGE_DESC_PROMPT = """Based on the content below, select 3 to 5 relevant events or content information and describe them along with their respective characteristics:
@@ -43,6 +51,19 @@
 
 """
 
+VIDEO_DESC_PROMPT = """Based on the content below, summarize a core thing, as well as related functions and processes
+```
+{0}
+```
+
+Please provide an answer similar to the one below, but without any additional information, details start with <VideoPrompt>, end with </VideoPrompt>, no content beyound tag <VideoPrompt> and </VideoPrompt>. 
+You should response me follow next format, only one json data with some key-value data:
+
+<VideoPrompt> {{"video-name-1": "<summary content1>"}} </VideoPrompt>
+
+"""
+
+
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 
 PROJECT_DATA_PATH = os.path.join(ROOT_DIR, "data")
@@ -157,6 +178,56 @@ def generate_images(converted_dict: dict) -> str:
     return str(image_dict)
 
 
+def parse_video_info(summary_data: str) -> dict:
+    videos_prompt_info = VIDEO_DESC_PROMPT.format(summary_data)
+    logger.info(
+        f"\n====================================videos_prompt_info=\n{videos_prompt_info}"
+    )
+
+    video_info = query_from_openai_proxy(videos_prompt_info)
+    logger.info(f"\n====================================image_info=\n {video_info}")
+
+    # Extract the content within the ImagePrompt tag
+    start_index = video_info.index("<VideoPrompt>") + 13
+    end_index = video_info.index("</VideoPrompt>")
+    content = video_info[start_index:end_index]
+    logger.info(
+        f"\n=====================================extract json prompt from video_info=\n{content}"
+    )
+
+    data_dict = json.loads(content)
+    converted_dict = {key.replace(" ", "_"): value for key, value in data_dict.items()}
+    return converted_dict
+
+
+def generate_videos(converted_dict: dict) -> str:
+    video_dict = []
+    try:
+        if len(converted_dict) == 0:
+            return "No Videos"
+        index = 0
+        logger.info(
+            f"parse_video_info: start generate videos, total: {len(converted_dict)}, current: {index}"
+        )
+        # start request text2video:
+        for video_name, video_prompt in converted_dict.items():
+            index += 1
+            t2v_prompt = T2VPrompt()
+            t2v_prompt.prompt = video_prompt
+            download_url = t2v_request(t2v_prompt)
+            if download_url is None:
+                continue
+
+            video_dict.append({"video_name": video_name, "url": download_url})
+            logger.info(
+                f"parse_video_info: generating videos, total: {len(converted_dict)}, completed: {index}, video_dict={str(video_dict)}"
+            )
+        return str(video_dict)
+    except Exception as e:
+        logger.info(f"generate_videos exception: {str(e)}")
+        return str(video_dict)
+
+
 def write_markdown_content(content, filename, filepath):
     if not os.path.exists(filepath):
         os.makedirs(filepath)
@@ -201,7 +272,14 @@ async def startup(idea: str):
     image_data = generate_images(image_prompt_dict)
     logger.info(f"\ncompleted generate_images=\n{image_data}")
 
-    prompt_req = OPERATE_PROMPT.format(summary_data, image_data)
+    # if exist Text2Video model, add video info
+    video_prompt_dict = parse_video_info(summary_data)
+    logger.info(f"\ncompleted parse_video_info=\n{video_prompt_dict}")
+
+    video_data = generate_videos(video_prompt_dict)
+    logger.info(f"\ncompleted generate_videos=\n{video_data}")
+
+    prompt_req = OPERATE_PROMPT.format(summary_data, image_data, video_data)
     logger.info(f"\ngenerated markdown content prompt request=\n{prompt_req}")
 
     result = query_from_openai_proxy(prompt_req)