-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'develop' into agent/feat/restorable_agent_run
- Loading branch information
Showing
41 changed files
with
1,206 additions
and
631 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# ERNIE Bot Agent QA Bot | ||
|
||
ERNIE Bot Agent QA Bot是一个ERNIE Bot Agent使用教学机器人,基于 `FunctionAgentWithRetrieval`,该工具旨在辅助用户解决与EB-Agent相关的问题,帮助用户更快的使用 `erniebot_agent`库,搭建属于自己的Agent。 | ||
|
||
## 架构 | ||
|
||
此应用基于 `FunctionAgentWithRetrieval`(后续 `RetrievalAgent`上线后将同步更换),将此仓库中相关模块的markdown文件以及ipynb的示例代码文件向量化并通过自定义检索工具检索,实现EB-Agent教学机器人。 | ||
|
||
### 自定义检索工具 | ||
|
||
此应用中的检索工具基于 `langchain`的 `faiss`本地向量库,同时基于此应用特性,用户可能需要了解具体的代码实现。因此在实现时同时检索召回说明文档的内容(存储于db)以及相关的代码内容(存储于module_code_db)。 | ||
|
||
```python | ||
class FaissSearch: | ||
def __init__(self, db, embeddings, module_code_db): | ||
self.db = db | ||
self.module_code_db = module_code_db | ||
self.embeddings = embeddings | ||
``` | ||
|
||
## 如何开始 | ||
|
||
**注意:** 建库的过程比较缓慢,请耐心等待。 | ||
|
||
> 第一步:下载项目源代码,请确保您已经安装了erniebot_agent以及erniebot | ||
```bash | ||
git clone https://github.com/PaddlePaddle/ERNIE-SDK.git | ||
cd ERNIE-SDK | ||
pip install ernie_agent | ||
``` | ||
|
||
> 第二步:如果是第一次运行,请先初始化向量库(应用中同时上传了向量库也可以) | ||
```bash | ||
python question_bot.py --init=True --access-token <aistudio-access-token> | ||
``` | ||
|
||
> 如果已经初始化过向量库,直接运行即可 | ||
```bash | ||
python question_bot.py --access-token <aistudio-access-token> | ||
``` |
119 changes: 119 additions & 0 deletions
119
erniebot-agent/applications/eb-agent-qa-bot/init_vector_db.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
from typing import List, Union | ||
|
||
import erniebot | ||
import nbformat | ||
from langchain.text_splitter import ( | ||
MarkdownHeaderTextSplitter, | ||
RecursiveCharacterTextSplitter, | ||
) | ||
from langchain.vectorstores import FAISS | ||
from langchain_core.documents import Document | ||
from tqdm import tqdm | ||
|
||
headers_to_split_on = [ | ||
("#", "Header 1"), | ||
("##", "Header 2"), | ||
# ("###", "Header 3"), | ||
# ("####", "Header 4"), | ||
] | ||
|
||
|
||
def get_summary(content: str) -> Union[str, None]: | ||
"""Get summary of md files, you can also change another llm model.""" | ||
chat_message = {"role": "user", "content": f"请帮我给以下markdown文件生成摘要用于用户问文档内容时的检索匹配,不要超过400个字:\n{content}"} | ||
summary = erniebot.ChatCompletion.create( | ||
model="ernie-longtext", | ||
messages=[chat_message], | ||
).get_result() | ||
return summary | ||
|
||
|
||
def open_and_concatenate_ipynb(ipynb_path: str, encoding: str) -> str: | ||
"""Get content of ipynb file.""" | ||
with open(ipynb_path, "r", encoding=encoding) as f: | ||
notebook_content = nbformat.read(f, as_version=4) | ||
|
||
# 按顺序拼接code单元 | ||
concatenated_content = "" | ||
for cell in notebook_content["cells"]: | ||
if cell["cell_type"] == "code": | ||
concatenated_content += "```python\n" + cell["source"] + "```\n\n" | ||
|
||
return concatenated_content | ||
|
||
|
||
def read_md_file(file_path: str) -> Union[str, None]: | ||
try: | ||
with open(file_path, "r", encoding="utf-8") as file: | ||
md_content = file.read() | ||
return md_content | ||
except FileNotFoundError: | ||
print(f"文件 '{file_path}' 不存在。") | ||
return None | ||
except Exception as e: | ||
print(f"读取文件时出现错误: {e}") | ||
return None | ||
|
||
|
||
def load_md_files_to_doc( | ||
file_paths: List[str], | ||
chunk_size: int = 1000, | ||
chunk_overlap: int = 30, | ||
) -> List[Document]: | ||
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on) | ||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) | ||
output_document = [] | ||
for file in tqdm(file_paths): | ||
content = read_md_file(file) | ||
if content is None: | ||
continue | ||
md_header_splits = markdown_splitter.split_text(content) | ||
splits = text_splitter.split_documents(md_header_splits) | ||
for i in range(len(splits)): | ||
# 生成summary用于检索 | ||
splits[i].metadata["raw_text"] = splits[i].page_content | ||
splits[i].page_content = get_summary(splits[i].page_content) | ||
output_document.extend(splits) | ||
return output_document | ||
|
||
|
||
def init_db(faiss_name, faiss_name_module, embeddings): | ||
md_file_path = [ | ||
"./docs/modules/file.md", | ||
"./docs/modules/agents.md", | ||
"./docs/modules/memory.md", | ||
"./docs/modules/message.md", | ||
"./docs/modules/chat_models.md", | ||
"./docs/modules/tools.md", | ||
"./docs/quickstart/agent.md", | ||
"./docs/quickstart/use-tool.md", | ||
] | ||
chunk_size = 1000 | ||
chunk_overlap = 30 | ||
content_doc = load_md_files_to_doc(md_file_path, chunk_size, chunk_overlap) | ||
|
||
db = FAISS.from_documents(content_doc, embeddings) | ||
db.save_local(faiss_name) | ||
|
||
ipynb_path = [ | ||
"./docs/cookbooks/agent/function_agent.ipynb", | ||
"./docs/cookbooks/agent/chat_models.ipynb", | ||
"./docs/cookbooks/agent/memory.ipynb", | ||
"./docs/cookbooks/agent/message.ipynb", | ||
"./docs/cookbooks/agent/local_tool.ipynb", | ||
"./docs/cookbooks/agent/tools_intro.ipynb", | ||
"./docs/cookbooks/agent/remote-tool/remote_tool.ipynb", | ||
] | ||
modules = [item[item.rfind("/") + 1 : item.rfind(".ipynb")] for item in ipynb_path] | ||
module_doc = [] | ||
|
||
for i in range(len(modules)): | ||
module_doc.append( | ||
Document( | ||
page_content=modules[i], | ||
metadata={"ipynb": open_and_concatenate_ipynb(ipynb_path[i], "utf-8")}, | ||
) | ||
) | ||
|
||
module_code_db = FAISS.from_documents(module_doc, embeddings) | ||
module_code_db.save_local(faiss_name_module) |
Oops, something went wrong.