Skip to content

Commit

Permalink
add video generation in doc
Browse files Browse the repository at this point in the history
  • Loading branch information
xuyuan23 committed Aug 12, 2023
1 parent 0a677e0 commit e5ee3fd
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 3 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ git lfs install
git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
# [Options]
# Size: 94 GB, stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
# Size: 94 GB, supported run in cpu model(RAM>14 GB). stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
git lfs install
git clone https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
# [Options]
# Size: 10 GB, Text2Video service is recommended. https://github.com/xuyuan23/Text2Video
# Size: 16 GB, supported run in cpu model(RAM>16 GB). Text2Video service is recommended. https://github.com/xuyuan23/Text2Video
git lfs install
git clone https://huggingface.co/cerspense/zeroscope_v2_576w
```
Expand Down Expand Up @@ -89,6 +89,9 @@ OPEN_AI_KEY=sk-xxx

# If you don't deploy stable diffusion service, no image will be generated.
SD_PROXY_URL=127.0.0.1:7860

# If you don't deploy Text2Video service, no videos will be generated.
T2V_PROXY_URL=127.0.0.1:7861
```
- More Details see file `.env.template`

Expand Down
80 changes: 79 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from operategpt.providers import sd_proxy
from dotenv import load_dotenv

from operategpt.providers.base import T2VPrompt
from operategpt.providers.text2video_proxy import t2v_request

load_dotenv(verbose=True, override=True)
OPEN_AI_PROXY_SERVER_URL = os.getenv("OPEN_AI_PROXY_SERVER_URL", "https://api.openai.com/v1/chat/completions")
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
Expand All @@ -29,6 +32,11 @@
```
{1}
```
Please insert the following videos at the different appropriate locations in the document, not the same locations, you can use the format such as `<video width="640" height="360" controls> <source src="http://xxxxxx.mp4" type="video/mp4">video-name</video>`, if the video list is empty, please ignore.
```
{2}
```
"""

IMAGE_DESC_PROMPT = """Based on the content below, select 3 to 5 relevant events or content information and describe them along with their respective characteristics:
Expand All @@ -43,6 +51,19 @@
"""

VIDEO_DESC_PROMPT = """Based on the content below, summarize a core thing, as well as related functions and processes
```
{0}
```
Please provide an answer similar to the one below, but without any additional information, details start with <VideoPrompt>, end with </VideoPrompt>, no content beyound tag <VideoPrompt> and </VideoPrompt>.
You should response me follow next format, only one json data with some key-value data:
<VideoPrompt> {{"video-name-1": "<summary content1>"}} </VideoPrompt>
"""


ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

PROJECT_DATA_PATH = os.path.join(ROOT_DIR, "data")
Expand Down Expand Up @@ -157,6 +178,56 @@ def generate_images(converted_dict: dict) -> str:
return str(image_dict)


def parse_video_info(summary_data: str) -> dict:
videos_prompt_info = VIDEO_DESC_PROMPT.format(summary_data)
logger.info(
f"\n====================================videos_prompt_info=\n{videos_prompt_info}"
)

video_info = query_from_openai_proxy(videos_prompt_info)
logger.info(f"\n====================================image_info=\n {video_info}")

# Extract the content within the ImagePrompt tag
start_index = video_info.index("<VideoPrompt>") + 13
end_index = video_info.index("</VideoPrompt>")
content = video_info[start_index:end_index]
logger.info(
f"\n=====================================extract json prompt from video_info=\n{content}"
)

data_dict = json.loads(content)
converted_dict = {key.replace(" ", "_"): value for key, value in data_dict.items()}
return converted_dict


def generate_videos(converted_dict: dict) -> str:
video_dict = []
try:
if len(converted_dict) == 0:
return "No Videos"
index = 0
logger.info(
f"parse_video_info: start generate videos, total: {len(converted_dict)}, current: {index}"
)
# start request text2video:
for video_name, video_prompt in converted_dict.items():
index += 1
t2v_prompt = T2VPrompt()
t2v_prompt.prompt = video_prompt
download_url = t2v_request(t2v_prompt)
if download_url is None:
continue

video_dict.append({"video_name": video_name, "url": download_url})
logger.info(
f"parse_video_info: generating videos, total: {len(converted_dict)}, completed: {index}, video_dict={str(video_dict)}"
)
return str(video_dict)
except Exception as e:
logger.info(f"generate_videos exception: {str(e)}")
return str(video_dict)


def write_markdown_content(content, filename, filepath):
if not os.path.exists(filepath):
os.makedirs(filepath)
Expand Down Expand Up @@ -201,7 +272,14 @@ async def startup(idea: str):
image_data = generate_images(image_prompt_dict)
logger.info(f"\ncompleted generate_images=\n{image_data}")

prompt_req = OPERATE_PROMPT.format(summary_data, image_data)
# if exist Text2Video model, add video info
video_prompt_dict = parse_video_info(summary_data)
logger.info(f"\ncompleted parse_video_info=\n{video_prompt_dict}")

video_data = generate_videos(video_prompt_dict)
logger.info(f"\ncompleted generate_videos=\n{video_data}")

prompt_req = OPERATE_PROMPT.format(summary_data, image_data, video_data)
logger.info(f"\ngenerated markdown content prompt request=\n{prompt_req}")

result = query_from_openai_proxy(prompt_req)
Expand Down

0 comments on commit e5ee3fd

Please sign in to comment.