Skip to content

Commit

Permalink
add set vector store
Browse files Browse the repository at this point in the history
  • Loading branch information
cmgzn committed Oct 23, 2024
1 parent f7edc15 commit 5c754a4
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
}
}
},
"store_and_index": {
"data_parse": {
"splitter": {
"create_object": true,
"module": "langchain_text_splitters.python",
Expand All @@ -27,7 +27,23 @@
}
}
}
]
],
"store_and_index":{
"stores":{
"vector_store": {
"create_object": true,
"module": "langchain_elasticsearch",
"class": "ElasticsearchStore",
"init_args": {
"es_url": "http://localhost:9200",
"index_name": "lc_code_rag",
"es_user": "elastic",
"es_password": "147258",
"embedding_key": "embedding"
}
}
}
}
},
{
"knowledge_id": "agentscope_api_rag",
Expand Down Expand Up @@ -82,7 +98,7 @@
}
}
},
"store_and_index": {
"data_parse": {
"splitter": {
"create_object": true,
"module": "langchain_text_splitters.python",
Expand Down
66 changes: 56 additions & 10 deletions src/agentscope/rag/langchain_knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,14 @@ def _load_store(self) -> None:
Load the persisted index from persist_dir.
"""
# set the storage
self.vectorstore = InMemoryVectorStore.load(
self.persist_store_file,
self.emb_model,
self.vectorstore = self._set_store(
self.knowledge_config.get("store_and_index", {}),
)
if not self.vectorstore:
self.vectorstore = InMemoryVectorStore.load(
self.persist_store_file,
self.emb_model,
)
# set the record manager
self.record_manager = InMemoryRecordManager(self.knowledge_id)
self.record_manager.create_schema()
Expand All @@ -185,30 +189,43 @@ def _data_to_store(self) -> None:
chunks = []
for config in self.knowledge_config.get("data_processing"):
documents = self._data_to_docs(config=config)
splitter = self._set_splitter(config=config).get(
"splitter",
)
splitter = self._set_splitter(config=config).get("splitter")
chunks_docs = self._docs_to_chunks(
documents=documents,
splitter=splitter,
)
chunks = chunks + chunks_docs

# convert chunks to vector store and index
self.vectorstore = InMemoryVectorStore(self.emb_model)
self.vectorstore = self._set_store(
config=self.knowledge_config.get("store_and_index", {}),
)
if not self.vectorstore:
self.vectorstore = InMemoryVectorStore(
self.emb_model,
)
index(
chunks,
self.record_manager,
self.vectorstore,
cleanup=None,
source_id_key="source",
# upsert_kwargs={"embedding": self.emb_model}
# This feature is only supported in langchain 0.3.10
)
logger.info("vector store and index created successfully.")
self.vectorstore.dump(self.persist_store_file)

# persist
if isinstance(self.vectorstore, InMemoryVectorStore):
self.vectorstore.dump(self.persist_store_file)
logger.info("In-memory vector store are persisted.")
self._save_memory_record(self.persist_index_file)
logger.info("vector store and index are persisted.")
logger.info("index are persisted.")

def _save_memory_record(self, filename: str) -> None:
filedir = os.path.dirname(filename)
if not os.path.exists(filedir):
os.makedirs(filedir)
with open(filename, "w", encoding="utf-8") as f:
json.dump(self.record_manager.records, f, indent=4)

Expand Down Expand Up @@ -247,6 +264,26 @@ def _docs_to_chunks(
) -> Any:
return splitter.split_documents(documents)

def _set_store(self, config: dict) -> Any:
if "stores" in config:
init_config = (
config.get("stores", {})
.get("vector_store", {})
.get("init_args", {})
)
embedding_key = init_config.pop(
"embedding_key",
"embedding",
)
init_config[embedding_key] = self.emb_model
temp = self._prepare_args_from_config(
config=config.get("stores", {}),
)
vector_store = temp.get("vector_store")
else:
vector_store = None
return vector_store

def _set_loader(self, config: dict) -> Any:
"""
Set the loader as needed, or just use the default setting.
Expand Down Expand Up @@ -285,7 +322,16 @@ def _set_splitter(self, config: dict) -> Any:
Args:
config (dict): a dictionary containing configurations.
"""
if "store_and_index" in config:
if "data_parse" in config:
temp = self._prepare_args_from_config(
config=config.get("data_parse", {}),
)
splitter = temp.get("splitter")
elif "store_and_index" in config:
logger.warning(
"The old configuration structure is deprecated, "
"please use data_parse instead of store_and_index.",
)
temp = self._prepare_args_from_config(
config=config.get("store_and_index", {}),
)
Expand Down

0 comments on commit 5c754a4

Please sign in to comment.