Skip to content

Commit

Permalink
add video generation service. or proxy server from Text2Video
Browse files Browse the repository at this point in the history
  • Loading branch information
xuyuan23 committed Aug 11, 2023
1 parent 7464edd commit 0a677e0
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 12 deletions.
9 changes: 8 additions & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@ OPEN_AI_KEY=sk-xxx

EMBEDDING_MODEL=all-MiniLM-L6-v2

# your text2video model, default is zeroscope_v2_576w (more details: https://github.com/xuyuan23/Text2Video)
T2V_MODEL=zeroscope_v2_576w
#T2V_MODEL=text-to-video-ms-1.7b

VECTOR_STORE_TYPE=Chroma

# your stable diffusion proxy service address (recommend: https://github.com/xuyuan23/stablediffusion-proxy)
SD_PROXY_URL=127.0.0.1:7860
SD_PROXY_URL=http://127.0.0.1:7860

# your text2video proxy service address (recommend: https://github.com/xuyuan23/Text2Video)
T2V_PROXY_URL=http://127.0.0.1:7861

SD_MODEL=stable-diffusion-xl-base-1.0
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,15 @@ mkdir models & cd models
git lfs install
git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
# Size: 94 GB, so stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
# [Options]
# Size: 94 GB, stablediffusion-proxy service is recommended, https://github.com/xuyuan23/stablediffusion-proxy
git lfs install
git clone https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
# [Options]
# Size: 10 GB, Text2Video service is recommended. https://github.com/xuyuan23/Text2Video
git lfs install
git clone https://huggingface.co/cerspense/zeroscope_v2_576w
```

Then, download dependencies and launch your project.
Expand All @@ -65,9 +70,14 @@ pip install -r requirements.txt
# copy file `.env.template` to new file `.env`, and modify the params in `.env`.
cp .env.template .env
[Options]
# deploy stablediffusion service, if StableDiffusion proxy is used, no need to execute it!
python operategpt/providers/stablediffusion.py
[Options]
# deploy Text2Video service, if Text2Video proxy server is used, no need to execute it!
python operategpt/providers/text2video.py
python main.py "what is MetaGPT?"
```

Expand Down
1 change: 1 addition & 0 deletions docs/README_ZH.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

[**English**](../README.md) |[**文档**](http://operategpt.cn/web/#/602177878/152973408)|
</div>

- 利用大语言模型和多智能体技术,通过一行需求自动生成运营文案、图片和视频,一键发送多个平台实现快速运营的变革

![OperateGPT Process](../assets/operateGPT_process.png)
Expand Down
41 changes: 41 additions & 0 deletions docs/how_operategpt_work_zh.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

# OperateGPT:一句话需求即可完成运营

- 利用大语言模型和多智能体技术,通过一句话需求自动生成运营文案、图片和视频,一键适配发送多个平台实现快速运营的变革。

![](../assets/operateGPT_process.png)

## OperateGPT技术架构

![](../assets/operateGPT_arch.png)

### 1. 数据源
根据用户需求自动识别数据源,可能来源于下面各种数据:
- Internet网页数据:因特网上的所有能访问网页资源,都可以通过相关性搜索查询,作为默认识别的数据源
- 数据库:用户数据库数据作为私有数据,结合用户运营需求完成智能BI,生成分析图表
- 音/视频:结合运营需求场景相关性,可自动嵌入到运营报告中
- 图片:结合运营需求场景相关性,可自动嵌入运营报告
- 本地文档:本地文档资料Embedding之后存储至向量数据库,支持知识库问答, 也可以作为运营的重要汇总数据来源

### 2. 数据汇总
根据运营需求,从各个数据源中查询相关数据做进一步分析和汇总。
- URL数据:可以从google,百度,bing等网站和平台查询相关性较强的top网页数据,通过将这些数据切分成块之后Embedding,生成向量数据,再通过相似度搜索查询关系型最强的一些内容作为文案生成的基础数据。
- 数据库:结合运营需求与业务场景,生成SQL从 RDB/NoSQL数据库中查询相关数据生成报表,报表数据将作为运营数据的一部分嵌入
- 本地文档:从向量数据库搜索

### 3. 内容生成
- 通过Prompt工程从汇总数据提取一部分事物和场景以及其具备的一些特性,由StableDiffusion或者Midjourney生成图片,从Gen-2等生成视频内容
- 通过Prompt工程将现有的运营汇总内容、分析报表、图片、视频智能组合,快速生成符合运营需求的文章/视频/网页等
- 结合平台特性对生成内容做偏向调整,例如B站、抖音等视频流量平台专注于视频生成,将运营内容嵌入到视频中;知乎、CSDN等平台多结合图文内容生成文案

### 4. 内容审核
运营数据需要审核才能发布,最简单的审核方法就是让LLM学习内容审核条约实现智能审核
- 内容质量审核
- 内容合法合规审核
- 敏感数据审核

### 5. 平台推送
- 一键推送多平台
- 账号管理


15 changes: 15 additions & 0 deletions operategpt/providers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pydantic import BaseModel


class T2VPrompt(BaseModel):
prompt: str = None
num_inference_steps: int = 40
height: int = 320
width: int = 576
num_frames: int = 24


class T2ImgPrompt(BaseModel):
prompt: str = None
image_name: str = None
image_type: str = "png"
11 changes: 3 additions & 8 deletions operategpt/providers/stablediffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import uvicorn
from diffusers import DiffusionPipeline
from fastapi import FastAPI
from pydantic import BaseModel
from dotenv import load_dotenv

from operategpt.providers.base import T2ImgPrompt

load_dotenv(verbose=True, override=True)

app = FastAPI()
Expand All @@ -27,14 +28,8 @@
pipe.to(device)


class SDPrompt(BaseModel):
prompt: str = None
image_name: str = None
image_type: str = "png"


@app.post("/generate_img")
def sd_request(sd_prompt: SDPrompt):
def sd_request(sd_prompt: T2ImgPrompt):
prompt = sd_prompt.prompt
image_name = sd_prompt.image_name
image_type = sd_prompt.image_type
Expand Down
48 changes: 48 additions & 0 deletions operategpt/providers/text2video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os
import time

import torch
import uvicorn
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video
from fastapi import FastAPI
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydantic import BaseModel

app = FastAPI()

T2V_MODEL = os.getenv("T2V_MODEL")

ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
DATA_PATH = os.path.join(ROOT_PATH, "data")
MODEL_PATH = os.path.join(ROOT_PATH, "models")

pipe = DiffusionPipeline.from_pretrained(os.path.join(MODEL_PATH, T2V_MODEL), torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()


class LLMPrompt(BaseModel):
prompt: str = None
num_inference_steps: int = 40
height: int = 320
width: int = 576
num_frames: int = 24


@app.post("/generate_video")
def generate_video(lp: LLMPrompt):
video_frames = pipe(lp.prompt, lp.num_inference_steps, lp.height, lp.width, lp.num_frames).frames
timestamp = int(time.time())
video_name_tmp = f"{T2V_MODEL}_{str(timestamp)}_tmp.mp4"
video_path = export_to_video(video_frames, os.path.join(DATA_PATH, video_name_tmp))
video = VideoFileClip(video_path)

video_name = f"{T2V_MODEL}_{str(timestamp)}.mp4"
output_video_path = os.path.join(DATA_PATH, video_name)
video.write_videofile(output_video_path, codec="libx264", audio_codec="aac")
return output_video_path


if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7861, log_level="info")
30 changes: 30 additions & 0 deletions operategpt/providers/text2video_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import json
import os

import requests
from dotenv import load_dotenv

from operategpt.providers.base import T2VPrompt

load_dotenv(verbose=True, override=True)

T2V_PROXY_URL = os.getenv("T2V_PROXY_URL")
T2V_GENERATE_VIDEO_API = "/generate_video"


def t2v_request(t2v_prompt: T2VPrompt):
url = T2V_PROXY_URL + T2V_GENERATE_VIDEO_API
headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=json.dumps(t2v_prompt))
result = response.json()

if result.get("success"):
return result["msg"]
return None


if __name__ == "__main__":
prompt = T2VPrompt()
prompt.prompt = "A beautiful girl walks through the mall with a cup of milk tea, her hair blowing in the wind"
print(t2v_request(prompt))
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ loguru~=0.7.0
langchain~=0.0.142
chromadb~=0.3.23
pydantic~=1.10.7
fire~=0.4.0
fire~=0.5.0
flask~=2.3.2
diffusers~=0.19.3
uvicorn~=0.22.0
tqdm~=4.64.1
transformers~=4.28.0
python-dotenv~=1.0.0
torch~=2.0.0
torch~=2.0.0
moviepy~=1.0.3
bs4~=0.0.1

0 comments on commit 0a677e0

Please sign in to comment.