diff --git a/.gitignore b/.gitignore
index cd9696e..720d3a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,17 +4,8 @@
.env
.DS_Store
.vscode/settings.json
-AI Assistant/tmp.ipynb
-AI Assistant/.vscode/settings.json
-AI Assistant/data/
-AI Assistant/results/
-Carrot-Assistant/data/
-Carrot-Assistant/log/
-Carrot-Assistant/tmp.ipynb
-RAG/tmp.py
-Carrot-Assistant/omop_tmp.py
-RAG/.cache/
*.qdrant
-/Carrot-Assistant/tests/log
-/Carrot-Assistant/evaluation/datasets/*
-!/Carrot-Assistant/evaluation/datasets/example.csv
+/Lettuce/tests/log
+/Lettuce/evaluation/datasets/*
+!/Lettuce/evaluation/datasets/example.csv
+/Lettuce/log
diff --git a/AI Assistant/app.py b/AI Assistant/app.py
deleted file mode 100644
index 38f2efd..0000000
--- a/AI Assistant/app.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import streamlit as st
-from dotenv import load_dotenv
-
-from chain.chains import Chains
-from options.base_options import BaseOptions
-from templates.html_templates import bot_template, css
-from utils.utils import *
-
-
-def run() -> None:
- """
- Run the streamlit app
- """
- load_dotenv()
- opt = BaseOptions().parse()
- informal_names_chunks = None
- chain = None
-
- st.set_page_config(page_title="BRC AI Assistant", page_icon="💊", layout="wide")
- st.write(css, unsafe_allow_html=True)
-
- if "upload_flag" not in st.session_state:
- st.session_state.upload_flag = False
-
- st.header("BRC AI Assistant")
- welcome_message(bot_template, opt.llm_model["model_name"])
- with st.sidebar:
- st.subheader("User Medications List")
- st.button(
- "i",
- key="info",
- help="The medications list should be in an excel file with the a column of 'informal_names'",
- type="secondary",
- disabled=True,
- use_container_width=False,
- )
- user_documents = st.file_uploader(
- "Upload your excel file", type=["xlsx", "xls"], accept_multiple_files=False
- )
- if st.button("Upload"):
- with st.spinner("Uploading"):
- informal_names_chunks = load_user_document(
- user_documents, opt.df_chunk_size
- )
- if informal_names_chunks:
- st.success("Uploaded successfully")
- chain = Chains(
- chain_type="conversion",
- llm_model=opt.llm_model,
- temperature=opt.temperature,
- use_memory=opt.use_memory,
- memory_input_key="informal_names",
- use_simple_prompt=opt.use_simple_prompt,
- ).get_chain()
- st.session_state.upload_flag = True
- else:
- st.error("Failed to upload")
-
- if st.session_state.upload_flag:
- with st.spinner("Processing"):
- conversion_histories, outputs = handle_conversion(
- informal_names_chunks,
- chain,
- use_memory=opt.use_memory,
- visualize_chunk=opt.visualize_chunk,
- )
- handle_output_df(
- outputs,
- visualize_chunk=opt.visualize_chunk,
- model_name=opt.llm_model["model_name"],
- )
-
-
-if __name__ == "__main__":
- run()
-
-
-# TODO
-# 1. Handle the file with number of rows greater than LLM token limit
diff --git a/AI Assistant/chain/chains.py b/AI Assistant/chain/chains.py
deleted file mode 100644
index ddfb69f..0000000
--- a/AI Assistant/chain/chains.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from typing import Dict, Union
-
-from langchain.chains import LLMChain
-from langchain.memory import ConversationBufferMemory
-
-from chain.memory import get_memory
-from chain.models import get_model
-from chain.prompts import Prompts
-
-
-class Chains:
- """
- This class is used to generate the LLM chain.
- """
-
- def __init__(
- self,
- chain_type: str | None = None,
- llm_model: Dict | None = None,
- temperature: float = 0.7,
- prev_memory: ConversationBufferMemory | None = None,
- use_memory: bool = False,
- memory_input_key: str = "user_question",
- use_simple_prompt: bool = False,
- ):
- """
- Initialise the class
-
- Parameters:
- ----------
- chain_type: str
- The type of chain to generate
- llm_model: ChatOpenAI|LlamaCpp|GPT4All
- The model to use
- temperature: float
- The temperature to use
- prev_memory: ConversationBufferMemory
- The previous memory
- use_memory: bool
- Whether to use memory
- memory_input_key: str
- The memory input key
- use_simple_prompt: bool
- Whether to use a simple prompt
- """
- self.chain_type = chain_type.lower()
- self.hub = llm_model["hub"]
- self.model_name = llm_model["model_name"]
- self.temperature = temperature
- self.prev_memory = prev_memory
- self.use_memory = use_memory
- self.memory_input_key = memory_input_key
- if use_simple_prompt:
- self.prompt_type = "simple"
- else:
- self.prompt_type = self.chain_type
-
- def get_chain(self) -> LLMChain:
- """
- Get the chain
-
- Returns:
- -------
- LLMChain
- The LLM chain
- """
- prompt = Prompts(
- prompt_type=self.prompt_type,
- use_memory=self.use_memory,
- hub=self.hub,
- model_name=self.model_name,
- ).get_prompt()
- memory = None
- if self.use_memory:
- memory = get_memory(
- prev_memory=self.prev_memory, input_key=self.memory_input_key
- )
- return self._conversation_chain(memory=memory, prompt=prompt)
-
- def _conversation_chain(
- self, memory: ConversationBufferMemory, prompt: Prompts
- ) -> LLMChain:
- """
- Generate the conversation chain
-
- Parameters:
- ----------
- memory: ConversationBufferMemory
- The memory
- prompt: Prompts
- The prompt
-
- Returns:
- -------
- LLMChain
- The LLM chain
- """
- llm = get_model(
- hub=self.hub, model_name=self.model_name, temperature=self.temperature
- )
- memory = memory
- chain = LLMChain(
- llm=llm,
- prompt=prompt,
- memory=memory,
- verbose=True,
- )
- return chain
diff --git a/AI Assistant/chain/memory.py b/AI Assistant/chain/memory.py
deleted file mode 100644
index c19a4aa..0000000
--- a/AI Assistant/chain/memory.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from langchain.memory import ConversationBufferMemory
-
-
-def get_memory(
- prev_memory: ConversationBufferMemory | None = None,
- input_key: str = "user_question",
-) -> ConversationBufferMemory:
- """
- Get the memory
-
- Parameters:
- ----------
- prev_memory: ConversationBufferMemory
- The previous memory
- input_key: str
- The input key to use
-
- Returns:
- -------
- ConversationBufferMemory
- The memory
- """
- memory_key = "chat_history"
- ai_prefix = "AI Assistant"
- human_prefix = "User"
- memory = ConversationBufferMemory(
- memory_key=memory_key,
- return_messages=True,
- human_prefix=human_prefix,
- ai_prefix=ai_prefix,
- input_key=input_key,
- )
- if prev_memory:
- memory.input_key = "user_question"
- inputs = prev_memory[0].content
- outputs = prev_memory[1].content
- memory.save_context({input_key: inputs, "outputs": outputs})
-
- return memory
diff --git a/AI Assistant/chain/models.py b/AI Assistant/chain/models.py
deleted file mode 100644
index 9d89358..0000000
--- a/AI Assistant/chain/models.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import streamlit as st
-import torch
-from huggingface_hub import hf_hub_download
-from langchain_community.llms import GPT4All, LlamaCpp
-from langchain_openai import ChatOpenAI
-
-
-def get_model(
- hub: str, model_name: str, temperature: float = 0.7
-) -> ChatOpenAI | LlamaCpp | GPT4All:
- """
- Get the model
-
- Parameters:
- ----------
- hub: str
- The hub to use
- model_name: str
- The model name to use
- temperature: float
- The temperature to use
-
- Returns:
- -------
- Model
- The model
- """
-
- if hub.lower() == "openai":
- return ChatOpenAI(model=model_name, temperature=temperature)
-
- elif hub.lower() == "llamacpp":
- if model_name.lower() == "llama-2-7b":
- """
- [Llama-2](https://huggingface.co/meta-llama/Llama-2-7b-hf)
- [GGUF format](https://huggingface.co/TheBloke/Llama-2-7B-GGUF)
- """
- repo = "TheBloke/Llama-2-7B-GGUF"
- filename = "llama-2-7b.Q4_0.gguf" # Options: llama-2-7b.Q4_0.gguf, llama-2-7b.Q5_0.gguf, llama-2-7b.Q8_0.gguf
-
- elif model_name.lower() == "llama-2-7b-chat":
- """
- [Llama-2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
- [GGUF format](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF)
- """
- repo = "TheBloke/Llama-2-7B-Chat-GGUF"
- filename = "llama-2-7b-chat.Q4_0.gguf" # Options: llama-2-7b-chat.Q4_0.gguf, llama-2-7b-chat.Q5_0.gguf, llama-2-7b-chat.Q8_0.gguf
-
- elif model_name.lower() == "llama-2-13b":
- """
- [Llama-2](https://huggingface.co/meta-llama/Llama-2-13b-hf)
- [GGUF format](https://huggingface.co/TheBloke/Llama-2-13B-GGUF)
- """
- repo = "TheBloke/Llama-2-13B-GGUF"
- filename = "llama-2-13b.Q4_0.gguf" # Options: llama-2-13b.Q4_0.gguf, llama-2-13b.Q5_0.gguf, llama-2-13b.Q8_0.gguf
-
- elif model_name.lower() == "llama-2-13b-chat":
- """
- [Llama-2](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf)
- [GGUF format](https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF)
- """
- repo = "TheBloke/Llama-2-13B-Chat-GGUF"
- filename = "llama-2-13b-chat.Q4_0.gguf" # Options: llama-2-13b-chat.Q4_0.gguf, llama-2-13b-chat.Q5_0.gguf, llama-2-13b-chat.Q8_0.gguf
-
- elif model_name.lower() == "llama-2-70b-chat":
- """
- [Llama-2](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf)
- [GGUF format](https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF)
- """
- repo = "TheBloke/Llama-2-70B-Chat-GGUF"
- filename = "llama-2-70b-chat.Q4_0.gguf" # Options: llama-2-70b-chat.Q4_0.gguf, llama-2-70b-chat.Q5_0.gguf
-
- else:
- raise ValueError(f"Invalid model: {hub}/{model_name}")
-
- n_gpu_layers = -1 if torch.cuda.is_available() else 0
- gguf_model = hf_hub_download(
- repo_id=repo,
- filename=filename,
- )
- return LlamaCpp(
- model_path=gguf_model,
- n_gpu_layers=n_gpu_layers,
- temperature=temperature,
- n_ctx=0, # Text context, 0 = from model
- n_batch=512,
- max_tokens=2048,
- f16_kv=True,
- verbose=True,
- )
-
- elif hub.lower() == "gpt4all":
- if model_name.lower() == "mistral-7b-openorca":
- model = "mistral-7b-openorca.gguf2.Q4_0.gguf"
- elif model_name.lower() == "mistral-7b-instruct":
- model = "mistral-7b-instruct-v0.1.Q4_0.gguf"
- elif model_name.lower() == "gpt4all-falcon-newbpe":
- model = "gpt4all-falcon-newbpe-q4_0.gguf"
- device = "gpu" if torch.cuda.is_available() else "cpu"
- return GPT4All(
- model=model,
- temp=temperature,
- n_batch=512,
- n_predict=2048,
- verbose=True,
- allow_download=True,
- device=device,
- )
-
- else:
- raise ValueError(f"Invalid hub: {hub}")
diff --git a/AI Assistant/chain/prompts.py b/AI Assistant/chain/prompts.py
deleted file mode 100644
index 0e23d51..0000000
--- a/AI Assistant/chain/prompts.py
+++ /dev/null
@@ -1,145 +0,0 @@
-from langchain.prompts import PromptTemplate
-
-
-class Prompts:
- """
- This class is used to generate prompts for the models.
- """
-
- def __init__(
- self,
- prompt_type: str | None = None,
- use_memory: bool = False,
- hub: str | None = None,
- model_name: str | None = None,
- ) -> PromptTemplate:
- """
- Initialise the class
-
- Parameters:
- ----------
- prompt_type: str
- The type of prompt to generate
- use_memory: bool
- Whether to use memory in the prompt
- hub: str
- The hub to use
- model_name: str
- The model name to use
-
- Returns:
- -------
- PromptTemplate
- The prompt template
- """
- self.prompt_type = prompt_type
- self.use_memory = use_memory
- self.hub = hub
- self.model_name = model_name
-
- def get_prompt(self) -> PromptTemplate:
- """
- Get the prompt
-
- Returns:
- -------
- PromptTemplate
- The prompt template
- """
- if self.prompt_type == "simple":
- return self._simple_prompt()
- else:
- if self.prompt_type == "conversion":
- if "llamacpp" in self.hub.lower():
- return self._medicine_conversion_Llama(use_memory=self.use_memory)
- else:
- return self._medicine_conversion(use_memory=self.use_memory)
-
- def _simple_prompt(self) -> PromptTemplate:
- """
- Generate a simple prompt
-
- Returns:
- -------
- PromptTemplate
- The prompt template
- """
- template = """[INST]What are the formal names of medications:{informal_names}-{informal_names_length}?[/INST]"""
- return PromptTemplate.from_template(template)
-
- def _medicine_conversion(self, use_memory: bool = False) -> PromptTemplate:
- """
- Generate a medicine conversion prompt
-
- Parameters:
- ----------
- use_memory: bool
- Whether to use memory in the prompt
-
- Returns:
- -------
- PromptTemplate
- The prompt template
- """
- template = """\
- You are an AI assistant for the pharmaceutical department at the University of Nottingham. \
- Your task is to process a dataframe containing informal names of medications and convert them into \
- the respective formal drug names, utilizing your extensive knowledge base. \
- You will receive the dataframe as input called "informal_names" which contains a list of informal names of medications. \
- When producing the output, you must follow these guidelines: \
- - The produced output should be a dictionary. \
- - The dictionary should have two keys: "informal_names" and "formal_names" and the values should be lists of the same length. \
- - The produced "informal_names" should be same as the user input. Do not change it. \
- - The produced "formal_names" should be complete and not partial. \
- - The length of the input informal names is {informal_names_length}. The produced output length should be equal to the length of the input informal names for both keys. It is a mandatory requirement. \
- - The produced output should be in a format to be used to import into a pandas dataframe. \
- - Don't produce any other output or sentence rather than the dataframe. \
- - If you don't know the formal name of a medicine, don't try to make up a name or repeat the informal name. \
- Here is the examples of the format of the user input and the expected output you should produce: \
- Example: \
- user_input: \
- [Document(page_content='Ppaliperidone (3-month)'), Document(page_content='Latanoprost 0.005% (Both Eye)'), Document(page_content='Euthyrox (Sun)'), Document(page_content='Dapagliflozin'), Document(page_content='Humalog 32/22'), Document(page_content='Telmisartan/Amlodipine'), Document(page_content='Ashwagandha')] \
- expected_output: \
- informal_names=["Ppaliperidone (3-month)", "Latanoprost 0.005% (Both Eye)", "Euthyrox (Sun)", "Dapagliflozin", "Humalog 32/22", "Telmisartan/Amlodipine", "Ashwagandha"], formal_names=["Paliperidone", "Latanoprost", "Levothyroxine", "Dapagliflozin", "Insulin lispro", "Telmisartan/Amlodipine", "Withania somnifera"]
-
- informal_names:
- {informal_names}
-
- AI Assistant Output:
- """
- if use_memory:
- template = template.replace(
- "AI Assistant Output:",
- "Chat History:\n{chat_history}\n\nAI Assistant Output:",
- )
- return PromptTemplate.from_template(template)
-
- def _medicine_conversion_Llama(self, use_memory: bool = False) -> PromptTemplate:
- """
- Edit the medicine conversion prompt for Llama models
-
- Parameters:
- ----------
- use_memory: bool
- Whether to use memory in the prompt
-
- Returns:
- -------
- PromptTemplate
- The prompt template
- """
- prompt = self._medicine_conversion(use_memory=use_memory)
- prompt.template = prompt.template.replace(
- "You are an AI assistant",
- "[INST] <>\nYou are an AI assistant",
- )
- prompt.template = prompt.template.replace(
- "informal_names:",
- "<>\ninformal_names:",
- )
- prompt.template = prompt.template.replace(
- "AI Assistant Output:",
- "AI Assistant Output: [/INST]",
- )
-
- return prompt
diff --git a/AI Assistant/constant.py b/AI Assistant/constant.py
deleted file mode 100644
index 05cd660..0000000
--- a/AI Assistant/constant.py
+++ /dev/null
@@ -1,7 +0,0 @@
-""" Constant variables for the package."""
-
-from pathlib import Path
-import os
-
-# The absolute path to the root of the package.
-PACKAGE_ROOT_PATH = Path(os.path.dirname(os.path.realpath(__file__)))
diff --git a/AI Assistant/options/base_options.py b/AI Assistant/options/base_options.py
deleted file mode 100644
index 9fad6d8..0000000
--- a/AI Assistant/options/base_options.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import argparse
-import ast
-from typing import Dict
-
-
-class BaseOptions:
- """
- This class defines options used during all types of experiments.
- It also implements several helper functions such as parsing, printing, and saving the options.
- """
-
- def __init__(self) -> None:
- """
- Initializes the BaseOptions class
-
- Parameters
- ----------
- None
-
- Returns
- -------
- None
- """
- self._parser = argparse.ArgumentParser()
- self._initialized = False
-
- def initialize(self) -> None:
- """
- Initializes the BaseOptions class
-
- Parameters
- ----------
- None
-
- Returns
- -------
- None
- """
- self._parser.add_argument(
- "--llm_model",
- type=lambda x: ast.literal_eval(x),
- required=False,
- default={"hub": "LlamaCpp", "model_name": "llama-2-7B-chat"},
- choices=[
- {"OpenAI", "gpt-3.5-turbo-0125"},
- {"OpenAI", "gpt-4"},
- {"LlamaCpp", "llama-2-7B"},
- {"LlamaCpp", "llama-2-7B-chat"},
- {"LlamaCpp", "llama-2-13B"},
- {"LlamaCpp", "llama-2-13B-chat"},
- {"LlamaCpp", "llama-2-70B-chat"},
- {"GPT4All", "mistral-7b-openorca"}, # Best overall fast chat model
- {
- "GPT4All",
- "mistral-7b-instruct",
- }, # Best overall fast instruction following model
- {
- "GPT4All",
- "gpt4all-falcon-newbpe",
- }, # Very fast model with good quality
- ],
- )
-
- self._parser.add_argument(
- "--temperature",
- type=float,
- required=False,
- default=0.7,
- help="temperature to control LLM output randomness",
- )
-
- self._parser.add_argument(
- "--chunk_size",
- type=int,
- required=False,
- default=2000,
- help="chunk size for text splitting",
- )
-
- self._parser.add_argument(
- "--chunk_overlap",
- type=int,
- required=False,
- default=200,
- help="chunk overlap for text splitting",
- )
- self._parser.add_argument(
- "--df_chunk_size",
- type=int,
- required=False,
- default=20,
- help="chunk size for dataframe splitting",
- )
-
- self._parser.add_argument(
- "--visualize_chunk",
- type=bool,
- required=False,
- default=True,
- help="whether to visualize the output chunk by chunk",
- )
-
- self._parser.add_argument(
- "--use_memory",
- type=bool,
- required=False,
- default=False,
- help="whether to use memory in the conversation",
- )
-
- self._parser.add_argument(
- "--use_simple_prompt",
- type=bool,
- required=False,
- default=False,
- help="whether to use simple prompt in the conversation",
- )
-
- self._initialized = True
-
- def parse(self) -> argparse.Namespace:
- """
- Parses the arguments passed to the script
-
- Parameters
- ----------
- None
-
- Returns
- -------
- opt: argparse.Namespace
- The parsed arguments
- """
- if not self._initialized:
- self.initialize()
- self._opt = self._parser.parse_args()
-
- args = vars(self._opt)
- # self._print(args)
-
- return self._opt
-
- def _print(self, args: Dict) -> None:
- """
- Prints the arguments passed to the script
-
- Parameters
- ----------
- args: dict
- The arguments to print
-
- Returns
- -------
- None
- """
- print("------------ Options -------------")
- for k, v in args.items():
- print(f"{str(k)}: {str(v)}")
- print("-------------- End ---------------")
diff --git a/AI Assistant/requirements.txt b/AI Assistant/requirements.txt
deleted file mode 100644
index e339973..0000000
--- a/AI Assistant/requirements.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-huggingface_hub==0.20.3
-langchain==0.1.11
-langchain_community==0.0.26
-langchain_openai==0.0.8
-langchain-experimental==0.0.51
-pandas==2.2.1
-python-dotenv==1.0.1
-streamlit==1.31.1
-torch==2.2.0
-openpyxl==3.1.2
-openai==1.12.0
-rapidocr-onnxruntime==1.3.11
-gpt4all==2.2.1
-llama-cpp-python==0.2.55
-langchainhub==0.1.15
-sentence_transformers==2.5.1
-huggingface_hub
-
-# In case the model does not find the GPU of the machine, do the following:
- # pip uninstall llama-cpp-python
- # python -m pip install llama-cpp-python --prefer-binary --no-cache-dir --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122
\ No newline at end of file
diff --git a/AI Assistant/templates/html_templates.py b/AI Assistant/templates/html_templates.py
deleted file mode 100644
index 047d8fd..0000000
--- a/AI Assistant/templates/html_templates.py
+++ /dev/null
@@ -1,46 +0,0 @@
-css = """
-