From 76ffbea39990befe2c7531124696f5fd1f46f018 Mon Sep 17 00:00:00 2001 From: James Mitchell-White Date: Wed, 20 Nov 2024 13:00:34 +0000 Subject: [PATCH] Deliverable/85/clean up repo (#87) * Removed deprecated folders * Update .gitignore Removed references to deprecated folders. Updated references to Carrot-Assistant * Renamed to Lettuce to be clear * renamed doc files folder * Black formatting * Delete EU_test_set.csv * Update .gitignore --- .gitignore | 17 +- AI Assistant/app.py | 79 --- AI Assistant/chain/chains.py | 108 ---- AI Assistant/chain/memory.py | 39 -- AI Assistant/chain/models.py | 111 ---- AI Assistant/chain/prompts.py | 145 ------ AI Assistant/constant.py | 7 - AI Assistant/options/base_options.py | 159 ------ AI Assistant/requirements.txt | 21 - AI Assistant/templates/html_templates.py | 46 -- AI Assistant/utils/utils.py | 265 ---------- Carrot-Assistant/ui.py | 7 - {Carrot-Assistant => Lettuce}/.dockerignore | 0 {Carrot-Assistant => Lettuce}/.env.example | 0 {Carrot-Assistant => Lettuce}/Dockerfile | 0 {Carrot-Assistant => Lettuce}/OBS/init_db.sql | 0 {Carrot-Assistant => Lettuce}/OBS/omop.sh | 0 {Carrot-Assistant => Lettuce}/README.md | 0 {Carrot-Assistant => Lettuce}/app.py | 6 +- {Carrot-Assistant => Lettuce}/assistant.py | 0 .../components/__init__.py | 0 .../components/embeddings.py | 0 .../components/models.py | 0 .../components/pipeline.py | 0 .../components/prompt.py | 12 +- {Carrot-Assistant => Lettuce}/compose.yaml | 0 {Carrot-Assistant => Lettuce}/csv_input.py | 25 +- {Carrot-Assistant => Lettuce}/environment.yml | 0 .../evaluate_file.py | 0 .../evaluation/__init__.py | 0 .../evaluation/datasets/example.csv | 0 .../evaluation/eval_data_loaders.py | 0 .../evaluation/eval_tests.py | 0 .../evaluation/evaltypes.py | 0 .../evaluation/metrics.py | 0 .../evaluation/pipelines.py | 0 .../example_output.json | 0 Lettuce/log/20240802.log | 352 +++++++++++++ .../__init__.py => Lettuce/log/20240806.log | 0 Lettuce/log/20240807.log | 58 +++ Lettuce/log/20240812.log | 440 ++++++++++++++++ Lettuce/log/20240813.log | 328 ++++++++++++ Lettuce/log/20240814.log | 66 +++ Lettuce/log/20240816.log | 280 ++++++++++ Lettuce/log/20240827.log | 72 +++ Lettuce/log/20240904.log | 248 +++++++++ Lettuce/log/20240913.log | 32 ++ Lettuce/log/20241001.log | 28 + Lettuce/log/20241016.log | 3 + Lettuce/log/20241025.log | 336 ++++++++++++ .../__init__.py => Lettuce/log/20241026.log | 0 Lettuce/log/20241028.log | 29 ++ .../omop/OMOP_match.py | 51 +- .../routers => Lettuce/omop}/__init__.py | 0 .../omop/omop_models.py | 63 ++- .../omop/preprocess.py | 7 +- .../tests => Lettuce/options}/__init__.py | 0 .../options/base_options.py | 0 .../options/pipeline_options.py | 0 {Carrot-Assistant => Lettuce}/poetry.lock | 0 {Carrot-Assistant => Lettuce}/pyproject.toml | 0 .../requirements.txt | 0 Lettuce/routers/__init__.py | 0 .../routers/pipeline_routes.py | 73 +-- Lettuce/tests/__init__.py | 0 .../tests/log/20240913.log | 0 .../tests/log/20241004.log | 0 Lettuce/tests/log/20241018.log | 180 +++++++ Lettuce/tests/log/20241021.log | 480 ++++++++++++++++++ Lettuce/tests/log/20241030.log | 240 +++++++++ .../tests/test_db.py | 31 +- .../tests/test_evals.py | 0 .../tests/test_preprocess.py | 7 +- .../tests/test_prompt_build.py | 49 +- {Carrot-Assistant => Lettuce}/text_input.py | 11 +- Lettuce/ui.py | 11 + {Carrot-Assistant => Lettuce}/ui_utilities.py | 8 +- .../utils/logging_utils.py | 2 +- {Carrot-Assistant => Lettuce}/utils/utils.py | 0 RAG/README.md | 20 - RAG/constant.py | 7 - RAG/llm/agents.py | 38 -- RAG/llm/chains.py | 38 -- RAG/llm/models.py | 10 - RAG/llm/prompts.py | 50 -- RAG/main.py | 53 -- RAG/options/base_option.py | 139 ----- RAG/options/rag_option.py | 90 ---- RAG/preprocessing/embeddings_const.py | 24 - RAG/preprocessing/vectors.py | 142 ------ RAG/utils/utils.py | 30 -- {llettuce-docs => lettuce-docs}/Makefile | 0 .../doctrees/autoapi/OMOP_match/index.doctree | Bin .../_build/doctrees/autoapi/app/index.doctree | Bin .../doctrees/autoapi/assistant/index.doctree | Bin .../autoapi/base_options/index.doctree | Bin .../doctrees/autoapi/embeddings/index.doctree | Bin .../_build/doctrees/autoapi/index.doctree | Bin .../autoapi/logging_utils/index.doctree | Bin .../doctrees/autoapi/models/index.doctree | Bin .../autoapi/omop_models/index.doctree | Bin .../doctrees/autoapi/pipeline/index.doctree | Bin .../doctrees/autoapi/preprocess/index.doctree | Bin .../doctrees/autoapi/prompt/index.doctree | Bin .../_build/doctrees/autoapi/ui/index.doctree | Bin .../doctrees/autoapi/utils/index.doctree | Bin .../_build/doctrees/environment.pickle | Bin .../_build/doctrees/index.doctree | Bin .../_build/doctrees/troubleshooting.doctree | Bin .../_build/doctrees/usage.doctree | Bin .../autoapi/OMOP_match/index.rst | 0 .../autoapi/app/index.rst | 0 .../autoapi/assistant/index.rst | 0 .../autoapi/base_options/index.rst | 0 .../autoapi/csv_input/index.rst | 0 .../autoapi/embeddings/index.rst | 0 .../autoapi/eval/index.rst | 0 .../autoapi/index.rst | 0 .../autoapi/logging_utils/index.rst | 0 .../autoapi/models/index.rst | 0 .../autoapi/omop_models/index.rst | 0 .../autoapi/pipeline/index.rst | 0 .../autoapi/preprocess/index.rst | 0 .../autoapi/prompt/index.rst | 0 .../autoapi/text_input/index.rst | 0 .../autoapi/ui/index.rst | 0 .../autoapi/ui_utilities/index.rst | 0 .../autoapi/utils/index.rst | 0 {llettuce-docs => lettuce-docs}/conf.py | 0 .../embedding_models.rst | 0 {llettuce-docs => lettuce-docs}/index.rst | 0 .../large_language_models.rst | 0 {llettuce-docs => lettuce-docs}/make.bat | 0 .../troubleshooting.rst | 0 {llettuce-docs => lettuce-docs}/usage.rst | 0 135 files changed, 3391 insertions(+), 1782 deletions(-) delete mode 100644 AI Assistant/app.py delete mode 100644 AI Assistant/chain/chains.py delete mode 100644 AI Assistant/chain/memory.py delete mode 100644 AI Assistant/chain/models.py delete mode 100644 AI Assistant/chain/prompts.py delete mode 100644 AI Assistant/constant.py delete mode 100644 AI Assistant/options/base_options.py delete mode 100644 AI Assistant/requirements.txt delete mode 100644 AI Assistant/templates/html_templates.py delete mode 100644 AI Assistant/utils/utils.py delete mode 100644 Carrot-Assistant/ui.py rename {Carrot-Assistant => Lettuce}/.dockerignore (100%) rename {Carrot-Assistant => Lettuce}/.env.example (100%) rename {Carrot-Assistant => Lettuce}/Dockerfile (100%) rename {Carrot-Assistant => Lettuce}/OBS/init_db.sql (100%) rename {Carrot-Assistant => Lettuce}/OBS/omop.sh (100%) rename {Carrot-Assistant => Lettuce}/README.md (100%) rename {Carrot-Assistant => Lettuce}/app.py (89%) rename {Carrot-Assistant => Lettuce}/assistant.py (100%) rename {Carrot-Assistant => Lettuce}/components/__init__.py (100%) rename {Carrot-Assistant => Lettuce}/components/embeddings.py (100%) rename {Carrot-Assistant => Lettuce}/components/models.py (100%) rename {Carrot-Assistant => Lettuce}/components/pipeline.py (100%) rename {Carrot-Assistant => Lettuce}/components/prompt.py (92%) rename {Carrot-Assistant => Lettuce}/compose.yaml (100%) rename {Carrot-Assistant => Lettuce}/csv_input.py (82%) rename {Carrot-Assistant => Lettuce}/environment.yml (100%) rename {Carrot-Assistant => Lettuce}/evaluate_file.py (100%) rename {Carrot-Assistant => Lettuce}/evaluation/__init__.py (100%) rename {Carrot-Assistant => Lettuce}/evaluation/datasets/example.csv (100%) rename {Carrot-Assistant => Lettuce}/evaluation/eval_data_loaders.py (100%) rename {Carrot-Assistant => Lettuce}/evaluation/eval_tests.py (100%) rename {Carrot-Assistant => Lettuce}/evaluation/evaltypes.py (100%) rename {Carrot-Assistant => Lettuce}/evaluation/metrics.py (100%) rename {Carrot-Assistant => Lettuce}/evaluation/pipelines.py (100%) rename {Carrot-Assistant => Lettuce}/example_output.json (100%) create mode 100644 Lettuce/log/20240802.log rename Carrot-Assistant/omop/__init__.py => Lettuce/log/20240806.log (100%) create mode 100644 Lettuce/log/20240807.log create mode 100644 Lettuce/log/20240812.log create mode 100644 Lettuce/log/20240813.log create mode 100644 Lettuce/log/20240814.log create mode 100644 Lettuce/log/20240816.log create mode 100644 Lettuce/log/20240827.log create mode 100644 Lettuce/log/20240904.log create mode 100644 Lettuce/log/20240913.log create mode 100644 Lettuce/log/20241001.log create mode 100644 Lettuce/log/20241016.log create mode 100644 Lettuce/log/20241025.log rename Carrot-Assistant/options/__init__.py => Lettuce/log/20241026.log (100%) create mode 100644 Lettuce/log/20241028.log rename {Carrot-Assistant => Lettuce}/omop/OMOP_match.py (94%) rename {Carrot-Assistant/routers => Lettuce/omop}/__init__.py (100%) rename {Carrot-Assistant => Lettuce}/omop/omop_models.py (62%) rename {Carrot-Assistant => Lettuce}/omop/preprocess.py (86%) rename {Carrot-Assistant/tests => Lettuce/options}/__init__.py (100%) rename {Carrot-Assistant => Lettuce}/options/base_options.py (100%) rename {Carrot-Assistant => Lettuce}/options/pipeline_options.py (100%) rename {Carrot-Assistant => Lettuce}/poetry.lock (100%) rename {Carrot-Assistant => Lettuce}/pyproject.toml (100%) rename {Carrot-Assistant => Lettuce}/requirements.txt (100%) create mode 100644 Lettuce/routers/__init__.py rename {Carrot-Assistant => Lettuce}/routers/pipeline_routes.py (82%) create mode 100644 Lettuce/tests/__init__.py rename {Carrot-Assistant => Lettuce}/tests/log/20240913.log (100%) rename {Carrot-Assistant => Lettuce}/tests/log/20241004.log (100%) create mode 100644 Lettuce/tests/log/20241018.log create mode 100644 Lettuce/tests/log/20241021.log create mode 100644 Lettuce/tests/log/20241030.log rename {Carrot-Assistant => Lettuce}/tests/test_db.py (55%) rename {Carrot-Assistant => Lettuce}/tests/test_evals.py (100%) rename {Carrot-Assistant => Lettuce}/tests/test_preprocess.py (74%) rename {Carrot-Assistant => Lettuce}/tests/test_prompt_build.py (53%) rename {Carrot-Assistant => Lettuce}/text_input.py (91%) create mode 100644 Lettuce/ui.py rename {Carrot-Assistant => Lettuce}/ui_utilities.py (96%) rename {Carrot-Assistant => Lettuce}/utils/logging_utils.py (99%) rename {Carrot-Assistant => Lettuce}/utils/utils.py (100%) delete mode 100644 RAG/README.md delete mode 100644 RAG/constant.py delete mode 100644 RAG/llm/agents.py delete mode 100644 RAG/llm/chains.py delete mode 100644 RAG/llm/models.py delete mode 100644 RAG/llm/prompts.py delete mode 100644 RAG/main.py delete mode 100644 RAG/options/base_option.py delete mode 100644 RAG/options/rag_option.py delete mode 100644 RAG/preprocessing/embeddings_const.py delete mode 100644 RAG/preprocessing/vectors.py delete mode 100644 RAG/utils/utils.py rename {llettuce-docs => lettuce-docs}/Makefile (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/OMOP_match/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/app/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/assistant/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/base_options/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/embeddings/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/logging_utils/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/models/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/omop_models/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/pipeline/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/preprocess/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/prompt/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/ui/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/autoapi/utils/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/environment.pickle (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/index.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/troubleshooting.doctree (100%) rename {llettuce-docs => lettuce-docs}/_build/doctrees/usage.doctree (100%) rename {llettuce-docs => lettuce-docs}/autoapi/OMOP_match/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/app/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/assistant/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/base_options/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/csv_input/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/embeddings/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/eval/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/logging_utils/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/models/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/omop_models/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/pipeline/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/preprocess/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/prompt/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/text_input/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/ui/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/ui_utilities/index.rst (100%) rename {llettuce-docs => lettuce-docs}/autoapi/utils/index.rst (100%) rename {llettuce-docs => lettuce-docs}/conf.py (100%) rename {llettuce-docs => lettuce-docs}/embedding_models.rst (100%) rename {llettuce-docs => lettuce-docs}/index.rst (100%) rename {llettuce-docs => lettuce-docs}/large_language_models.rst (100%) rename {llettuce-docs => lettuce-docs}/make.bat (100%) rename {llettuce-docs => lettuce-docs}/troubleshooting.rst (100%) rename {llettuce-docs => lettuce-docs}/usage.rst (100%) diff --git a/.gitignore b/.gitignore index cd9696e..720d3a1 100644 --- a/.gitignore +++ b/.gitignore @@ -4,17 +4,8 @@ .env .DS_Store .vscode/settings.json -AI Assistant/tmp.ipynb -AI Assistant/.vscode/settings.json -AI Assistant/data/ -AI Assistant/results/ -Carrot-Assistant/data/ -Carrot-Assistant/log/ -Carrot-Assistant/tmp.ipynb -RAG/tmp.py -Carrot-Assistant/omop_tmp.py -RAG/.cache/ *.qdrant -/Carrot-Assistant/tests/log -/Carrot-Assistant/evaluation/datasets/* -!/Carrot-Assistant/evaluation/datasets/example.csv +/Lettuce/tests/log +/Lettuce/evaluation/datasets/* +!/Lettuce/evaluation/datasets/example.csv +/Lettuce/log diff --git a/AI Assistant/app.py b/AI Assistant/app.py deleted file mode 100644 index 38f2efd..0000000 --- a/AI Assistant/app.py +++ /dev/null @@ -1,79 +0,0 @@ -import streamlit as st -from dotenv import load_dotenv - -from chain.chains import Chains -from options.base_options import BaseOptions -from templates.html_templates import bot_template, css -from utils.utils import * - - -def run() -> None: - """ - Run the streamlit app - """ - load_dotenv() - opt = BaseOptions().parse() - informal_names_chunks = None - chain = None - - st.set_page_config(page_title="BRC AI Assistant", page_icon="💊", layout="wide") - st.write(css, unsafe_allow_html=True) - - if "upload_flag" not in st.session_state: - st.session_state.upload_flag = False - - st.header("BRC AI Assistant") - welcome_message(bot_template, opt.llm_model["model_name"]) - with st.sidebar: - st.subheader("User Medications List") - st.button( - "i", - key="info", - help="The medications list should be in an excel file with the a column of 'informal_names'", - type="secondary", - disabled=True, - use_container_width=False, - ) - user_documents = st.file_uploader( - "Upload your excel file", type=["xlsx", "xls"], accept_multiple_files=False - ) - if st.button("Upload"): - with st.spinner("Uploading"): - informal_names_chunks = load_user_document( - user_documents, opt.df_chunk_size - ) - if informal_names_chunks: - st.success("Uploaded successfully") - chain = Chains( - chain_type="conversion", - llm_model=opt.llm_model, - temperature=opt.temperature, - use_memory=opt.use_memory, - memory_input_key="informal_names", - use_simple_prompt=opt.use_simple_prompt, - ).get_chain() - st.session_state.upload_flag = True - else: - st.error("Failed to upload") - - if st.session_state.upload_flag: - with st.spinner("Processing"): - conversion_histories, outputs = handle_conversion( - informal_names_chunks, - chain, - use_memory=opt.use_memory, - visualize_chunk=opt.visualize_chunk, - ) - handle_output_df( - outputs, - visualize_chunk=opt.visualize_chunk, - model_name=opt.llm_model["model_name"], - ) - - -if __name__ == "__main__": - run() - - -# TODO -# 1. Handle the file with number of rows greater than LLM token limit diff --git a/AI Assistant/chain/chains.py b/AI Assistant/chain/chains.py deleted file mode 100644 index ddfb69f..0000000 --- a/AI Assistant/chain/chains.py +++ /dev/null @@ -1,108 +0,0 @@ -from typing import Dict, Union - -from langchain.chains import LLMChain -from langchain.memory import ConversationBufferMemory - -from chain.memory import get_memory -from chain.models import get_model -from chain.prompts import Prompts - - -class Chains: - """ - This class is used to generate the LLM chain. - """ - - def __init__( - self, - chain_type: str | None = None, - llm_model: Dict | None = None, - temperature: float = 0.7, - prev_memory: ConversationBufferMemory | None = None, - use_memory: bool = False, - memory_input_key: str = "user_question", - use_simple_prompt: bool = False, - ): - """ - Initialise the class - - Parameters: - ---------- - chain_type: str - The type of chain to generate - llm_model: ChatOpenAI|LlamaCpp|GPT4All - The model to use - temperature: float - The temperature to use - prev_memory: ConversationBufferMemory - The previous memory - use_memory: bool - Whether to use memory - memory_input_key: str - The memory input key - use_simple_prompt: bool - Whether to use a simple prompt - """ - self.chain_type = chain_type.lower() - self.hub = llm_model["hub"] - self.model_name = llm_model["model_name"] - self.temperature = temperature - self.prev_memory = prev_memory - self.use_memory = use_memory - self.memory_input_key = memory_input_key - if use_simple_prompt: - self.prompt_type = "simple" - else: - self.prompt_type = self.chain_type - - def get_chain(self) -> LLMChain: - """ - Get the chain - - Returns: - ------- - LLMChain - The LLM chain - """ - prompt = Prompts( - prompt_type=self.prompt_type, - use_memory=self.use_memory, - hub=self.hub, - model_name=self.model_name, - ).get_prompt() - memory = None - if self.use_memory: - memory = get_memory( - prev_memory=self.prev_memory, input_key=self.memory_input_key - ) - return self._conversation_chain(memory=memory, prompt=prompt) - - def _conversation_chain( - self, memory: ConversationBufferMemory, prompt: Prompts - ) -> LLMChain: - """ - Generate the conversation chain - - Parameters: - ---------- - memory: ConversationBufferMemory - The memory - prompt: Prompts - The prompt - - Returns: - ------- - LLMChain - The LLM chain - """ - llm = get_model( - hub=self.hub, model_name=self.model_name, temperature=self.temperature - ) - memory = memory - chain = LLMChain( - llm=llm, - prompt=prompt, - memory=memory, - verbose=True, - ) - return chain diff --git a/AI Assistant/chain/memory.py b/AI Assistant/chain/memory.py deleted file mode 100644 index c19a4aa..0000000 --- a/AI Assistant/chain/memory.py +++ /dev/null @@ -1,39 +0,0 @@ -from langchain.memory import ConversationBufferMemory - - -def get_memory( - prev_memory: ConversationBufferMemory | None = None, - input_key: str = "user_question", -) -> ConversationBufferMemory: - """ - Get the memory - - Parameters: - ---------- - prev_memory: ConversationBufferMemory - The previous memory - input_key: str - The input key to use - - Returns: - ------- - ConversationBufferMemory - The memory - """ - memory_key = "chat_history" - ai_prefix = "AI Assistant" - human_prefix = "User" - memory = ConversationBufferMemory( - memory_key=memory_key, - return_messages=True, - human_prefix=human_prefix, - ai_prefix=ai_prefix, - input_key=input_key, - ) - if prev_memory: - memory.input_key = "user_question" - inputs = prev_memory[0].content - outputs = prev_memory[1].content - memory.save_context({input_key: inputs, "outputs": outputs}) - - return memory diff --git a/AI Assistant/chain/models.py b/AI Assistant/chain/models.py deleted file mode 100644 index 9d89358..0000000 --- a/AI Assistant/chain/models.py +++ /dev/null @@ -1,111 +0,0 @@ -import streamlit as st -import torch -from huggingface_hub import hf_hub_download -from langchain_community.llms import GPT4All, LlamaCpp -from langchain_openai import ChatOpenAI - - -def get_model( - hub: str, model_name: str, temperature: float = 0.7 -) -> ChatOpenAI | LlamaCpp | GPT4All: - """ - Get the model - - Parameters: - ---------- - hub: str - The hub to use - model_name: str - The model name to use - temperature: float - The temperature to use - - Returns: - ------- - Model - The model - """ - - if hub.lower() == "openai": - return ChatOpenAI(model=model_name, temperature=temperature) - - elif hub.lower() == "llamacpp": - if model_name.lower() == "llama-2-7b": - """ - [Llama-2](https://huggingface.co/meta-llama/Llama-2-7b-hf) - [GGUF format](https://huggingface.co/TheBloke/Llama-2-7B-GGUF) - """ - repo = "TheBloke/Llama-2-7B-GGUF" - filename = "llama-2-7b.Q4_0.gguf" # Options: llama-2-7b.Q4_0.gguf, llama-2-7b.Q5_0.gguf, llama-2-7b.Q8_0.gguf - - elif model_name.lower() == "llama-2-7b-chat": - """ - [Llama-2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) - [GGUF format](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF) - """ - repo = "TheBloke/Llama-2-7B-Chat-GGUF" - filename = "llama-2-7b-chat.Q4_0.gguf" # Options: llama-2-7b-chat.Q4_0.gguf, llama-2-7b-chat.Q5_0.gguf, llama-2-7b-chat.Q8_0.gguf - - elif model_name.lower() == "llama-2-13b": - """ - [Llama-2](https://huggingface.co/meta-llama/Llama-2-13b-hf) - [GGUF format](https://huggingface.co/TheBloke/Llama-2-13B-GGUF) - """ - repo = "TheBloke/Llama-2-13B-GGUF" - filename = "llama-2-13b.Q4_0.gguf" # Options: llama-2-13b.Q4_0.gguf, llama-2-13b.Q5_0.gguf, llama-2-13b.Q8_0.gguf - - elif model_name.lower() == "llama-2-13b-chat": - """ - [Llama-2](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) - [GGUF format](https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF) - """ - repo = "TheBloke/Llama-2-13B-Chat-GGUF" - filename = "llama-2-13b-chat.Q4_0.gguf" # Options: llama-2-13b-chat.Q4_0.gguf, llama-2-13b-chat.Q5_0.gguf, llama-2-13b-chat.Q8_0.gguf - - elif model_name.lower() == "llama-2-70b-chat": - """ - [Llama-2](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) - [GGUF format](https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF) - """ - repo = "TheBloke/Llama-2-70B-Chat-GGUF" - filename = "llama-2-70b-chat.Q4_0.gguf" # Options: llama-2-70b-chat.Q4_0.gguf, llama-2-70b-chat.Q5_0.gguf - - else: - raise ValueError(f"Invalid model: {hub}/{model_name}") - - n_gpu_layers = -1 if torch.cuda.is_available() else 0 - gguf_model = hf_hub_download( - repo_id=repo, - filename=filename, - ) - return LlamaCpp( - model_path=gguf_model, - n_gpu_layers=n_gpu_layers, - temperature=temperature, - n_ctx=0, # Text context, 0 = from model - n_batch=512, - max_tokens=2048, - f16_kv=True, - verbose=True, - ) - - elif hub.lower() == "gpt4all": - if model_name.lower() == "mistral-7b-openorca": - model = "mistral-7b-openorca.gguf2.Q4_0.gguf" - elif model_name.lower() == "mistral-7b-instruct": - model = "mistral-7b-instruct-v0.1.Q4_0.gguf" - elif model_name.lower() == "gpt4all-falcon-newbpe": - model = "gpt4all-falcon-newbpe-q4_0.gguf" - device = "gpu" if torch.cuda.is_available() else "cpu" - return GPT4All( - model=model, - temp=temperature, - n_batch=512, - n_predict=2048, - verbose=True, - allow_download=True, - device=device, - ) - - else: - raise ValueError(f"Invalid hub: {hub}") diff --git a/AI Assistant/chain/prompts.py b/AI Assistant/chain/prompts.py deleted file mode 100644 index 0e23d51..0000000 --- a/AI Assistant/chain/prompts.py +++ /dev/null @@ -1,145 +0,0 @@ -from langchain.prompts import PromptTemplate - - -class Prompts: - """ - This class is used to generate prompts for the models. - """ - - def __init__( - self, - prompt_type: str | None = None, - use_memory: bool = False, - hub: str | None = None, - model_name: str | None = None, - ) -> PromptTemplate: - """ - Initialise the class - - Parameters: - ---------- - prompt_type: str - The type of prompt to generate - use_memory: bool - Whether to use memory in the prompt - hub: str - The hub to use - model_name: str - The model name to use - - Returns: - ------- - PromptTemplate - The prompt template - """ - self.prompt_type = prompt_type - self.use_memory = use_memory - self.hub = hub - self.model_name = model_name - - def get_prompt(self) -> PromptTemplate: - """ - Get the prompt - - Returns: - ------- - PromptTemplate - The prompt template - """ - if self.prompt_type == "simple": - return self._simple_prompt() - else: - if self.prompt_type == "conversion": - if "llamacpp" in self.hub.lower(): - return self._medicine_conversion_Llama(use_memory=self.use_memory) - else: - return self._medicine_conversion(use_memory=self.use_memory) - - def _simple_prompt(self) -> PromptTemplate: - """ - Generate a simple prompt - - Returns: - ------- - PromptTemplate - The prompt template - """ - template = """[INST]What are the formal names of medications:{informal_names}-{informal_names_length}?[/INST]""" - return PromptTemplate.from_template(template) - - def _medicine_conversion(self, use_memory: bool = False) -> PromptTemplate: - """ - Generate a medicine conversion prompt - - Parameters: - ---------- - use_memory: bool - Whether to use memory in the prompt - - Returns: - ------- - PromptTemplate - The prompt template - """ - template = """\ - You are an AI assistant for the pharmaceutical department at the University of Nottingham. \ - Your task is to process a dataframe containing informal names of medications and convert them into \ - the respective formal drug names, utilizing your extensive knowledge base. \ - You will receive the dataframe as input called "informal_names" which contains a list of informal names of medications. \ - When producing the output, you must follow these guidelines: \ - - The produced output should be a dictionary. \ - - The dictionary should have two keys: "informal_names" and "formal_names" and the values should be lists of the same length. \ - - The produced "informal_names" should be same as the user input. Do not change it. \ - - The produced "formal_names" should be complete and not partial. \ - - The length of the input informal names is {informal_names_length}. The produced output length should be equal to the length of the input informal names for both keys. It is a mandatory requirement. \ - - The produced output should be in a format to be used to import into a pandas dataframe. \ - - Don't produce any other output or sentence rather than the dataframe. \ - - If you don't know the formal name of a medicine, don't try to make up a name or repeat the informal name. \ - Here is the examples of the format of the user input and the expected output you should produce: \ - Example: \ - user_input: \ - [Document(page_content='Ppaliperidone (3-month)'), Document(page_content='Latanoprost 0.005% (Both Eye)'), Document(page_content='Euthyrox (Sun)'), Document(page_content='Dapagliflozin'), Document(page_content='Humalog 32/22'), Document(page_content='Telmisartan/Amlodipine'), Document(page_content='Ashwagandha')] \ - expected_output: \ - informal_names=["Ppaliperidone (3-month)", "Latanoprost 0.005% (Both Eye)", "Euthyrox (Sun)", "Dapagliflozin", "Humalog 32/22", "Telmisartan/Amlodipine", "Ashwagandha"], formal_names=["Paliperidone", "Latanoprost", "Levothyroxine", "Dapagliflozin", "Insulin lispro", "Telmisartan/Amlodipine", "Withania somnifera"] - - informal_names: - {informal_names} - - AI Assistant Output: - """ - if use_memory: - template = template.replace( - "AI Assistant Output:", - "Chat History:\n{chat_history}\n\nAI Assistant Output:", - ) - return PromptTemplate.from_template(template) - - def _medicine_conversion_Llama(self, use_memory: bool = False) -> PromptTemplate: - """ - Edit the medicine conversion prompt for Llama models - - Parameters: - ---------- - use_memory: bool - Whether to use memory in the prompt - - Returns: - ------- - PromptTemplate - The prompt template - """ - prompt = self._medicine_conversion(use_memory=use_memory) - prompt.template = prompt.template.replace( - "You are an AI assistant", - "[INST] <>\nYou are an AI assistant", - ) - prompt.template = prompt.template.replace( - "informal_names:", - "<>\ninformal_names:", - ) - prompt.template = prompt.template.replace( - "AI Assistant Output:", - "AI Assistant Output: [/INST]", - ) - - return prompt diff --git a/AI Assistant/constant.py b/AI Assistant/constant.py deleted file mode 100644 index 05cd660..0000000 --- a/AI Assistant/constant.py +++ /dev/null @@ -1,7 +0,0 @@ -""" Constant variables for the package.""" - -from pathlib import Path -import os - -# The absolute path to the root of the package. -PACKAGE_ROOT_PATH = Path(os.path.dirname(os.path.realpath(__file__))) diff --git a/AI Assistant/options/base_options.py b/AI Assistant/options/base_options.py deleted file mode 100644 index 9fad6d8..0000000 --- a/AI Assistant/options/base_options.py +++ /dev/null @@ -1,159 +0,0 @@ -import argparse -import ast -from typing import Dict - - -class BaseOptions: - """ - This class defines options used during all types of experiments. - It also implements several helper functions such as parsing, printing, and saving the options. - """ - - def __init__(self) -> None: - """ - Initializes the BaseOptions class - - Parameters - ---------- - None - - Returns - ------- - None - """ - self._parser = argparse.ArgumentParser() - self._initialized = False - - def initialize(self) -> None: - """ - Initializes the BaseOptions class - - Parameters - ---------- - None - - Returns - ------- - None - """ - self._parser.add_argument( - "--llm_model", - type=lambda x: ast.literal_eval(x), - required=False, - default={"hub": "LlamaCpp", "model_name": "llama-2-7B-chat"}, - choices=[ - {"OpenAI", "gpt-3.5-turbo-0125"}, - {"OpenAI", "gpt-4"}, - {"LlamaCpp", "llama-2-7B"}, - {"LlamaCpp", "llama-2-7B-chat"}, - {"LlamaCpp", "llama-2-13B"}, - {"LlamaCpp", "llama-2-13B-chat"}, - {"LlamaCpp", "llama-2-70B-chat"}, - {"GPT4All", "mistral-7b-openorca"}, # Best overall fast chat model - { - "GPT4All", - "mistral-7b-instruct", - }, # Best overall fast instruction following model - { - "GPT4All", - "gpt4all-falcon-newbpe", - }, # Very fast model with good quality - ], - ) - - self._parser.add_argument( - "--temperature", - type=float, - required=False, - default=0.7, - help="temperature to control LLM output randomness", - ) - - self._parser.add_argument( - "--chunk_size", - type=int, - required=False, - default=2000, - help="chunk size for text splitting", - ) - - self._parser.add_argument( - "--chunk_overlap", - type=int, - required=False, - default=200, - help="chunk overlap for text splitting", - ) - self._parser.add_argument( - "--df_chunk_size", - type=int, - required=False, - default=20, - help="chunk size for dataframe splitting", - ) - - self._parser.add_argument( - "--visualize_chunk", - type=bool, - required=False, - default=True, - help="whether to visualize the output chunk by chunk", - ) - - self._parser.add_argument( - "--use_memory", - type=bool, - required=False, - default=False, - help="whether to use memory in the conversation", - ) - - self._parser.add_argument( - "--use_simple_prompt", - type=bool, - required=False, - default=False, - help="whether to use simple prompt in the conversation", - ) - - self._initialized = True - - def parse(self) -> argparse.Namespace: - """ - Parses the arguments passed to the script - - Parameters - ---------- - None - - Returns - ------- - opt: argparse.Namespace - The parsed arguments - """ - if not self._initialized: - self.initialize() - self._opt = self._parser.parse_args() - - args = vars(self._opt) - # self._print(args) - - return self._opt - - def _print(self, args: Dict) -> None: - """ - Prints the arguments passed to the script - - Parameters - ---------- - args: dict - The arguments to print - - Returns - ------- - None - """ - print("------------ Options -------------") - for k, v in args.items(): - print(f"{str(k)}: {str(v)}") - print("-------------- End ---------------") diff --git a/AI Assistant/requirements.txt b/AI Assistant/requirements.txt deleted file mode 100644 index e339973..0000000 --- a/AI Assistant/requirements.txt +++ /dev/null @@ -1,21 +0,0 @@ -huggingface_hub==0.20.3 -langchain==0.1.11 -langchain_community==0.0.26 -langchain_openai==0.0.8 -langchain-experimental==0.0.51 -pandas==2.2.1 -python-dotenv==1.0.1 -streamlit==1.31.1 -torch==2.2.0 -openpyxl==3.1.2 -openai==1.12.0 -rapidocr-onnxruntime==1.3.11 -gpt4all==2.2.1 -llama-cpp-python==0.2.55 -langchainhub==0.1.15 -sentence_transformers==2.5.1 -huggingface_hub - -# In case the model does not find the GPU of the machine, do the following: - # pip uninstall llama-cpp-python - # python -m pip install llama-cpp-python --prefer-binary --no-cache-dir --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122 \ No newline at end of file diff --git a/AI Assistant/templates/html_templates.py b/AI Assistant/templates/html_templates.py deleted file mode 100644 index 047d8fd..0000000 --- a/AI Assistant/templates/html_templates.py +++ /dev/null @@ -1,46 +0,0 @@ -css = """ -