Skip to content

Commit

Permalink
Index file; Fixed bug when index gets reset when changing model
Browse files Browse the repository at this point in the history
  • Loading branch information
chigkim committed Feb 18, 2024
1 parent 5eb297c commit 793278d
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 21 deletions.
13 changes: 7 additions & 6 deletions Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ def __init__(self, name="neural-chat", host="http://localhost:11434"):
self.rag = None
self.load_parameters()

def initRag(self):
self.rag = RAG(self.host, self.name)
def updateRag(self):
self.rag.update_settings(self.host, self.name)

def startRag(self, path, setStatus):
self.initRag()
if path.startswith("http"): self.rag.loadUrl(path, setStatus)
self.rag = RAG(self.host, self.name)
if isinstance(path, list): self.rag.loadFolder(path, setStatus)
elif path.startswith("http"): self.rag.loadUrl(path, setStatus)
else: self.rag.loadFolder(path, setStatus)

def load_parameters(self):
Expand All @@ -34,11 +35,11 @@ def load_parameters(self):
def setHost(self, host):
self.host = host
self.client = Client(host=host)
if self.rag: self.initRag()
if self.rag: self.updateRag()

def setModel(self, name):
self.name = name
if self.rag: self.initRag()
if self.rag: self.updateRag()

def setSystem(self, system):
if system == "": return
Expand Down
20 changes: 13 additions & 7 deletions RAG.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,22 @@
from tiktoken_ext import openai_public
import tiktoken_ext
from Parameters import get_parameters
import os

class RAG:
def __init__(self, host, model):
self.llm_name=model
self.host=host
#Settings.embed_model = OllamaEmbedding(base_url=host, model_name=model)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
options = get_parameters()
Settings.llm = Ollama(model=model, request_timeout=300.0, base_url=host, additional_kwargs=options)
self.index = None
self.token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
Settings.callback_manager = CallbackManager([self.token_counter])
self.update_settings()
self.update_settings(host, model)

def update_settings(self):
def update_settings(self, host, model):
options = get_parameters()
Settings.llm = Ollama(model=model, request_timeout=600, base_url=host, additional_kwargs=options)
Settings.chunk_size = settings.chunk_size
Settings.chunk_overlap = settings.chunk_overlap
Settings.similarity_top_k = settings.similarity_top_k
Expand Down Expand Up @@ -54,10 +56,14 @@ def loadUrl(self, url, setStatus):
displayError(e)
setStatus("Failed to index.")

def loadFolder(self, folder, setStatus):
def loadFolder(self, path, setStatus):
try:
start = time()
documents = SimpleDirectoryReader(folder, recursive=True).load_data()
if isinstance(path, str):
documents = SimpleDirectoryReader(path, recursive=True).load_data()
else:
documents = SimpleDirectoryReader(input_files=path).load_data()

self.index = VectorStoreIndex.from_documents(documents)
message = f"Indexed folder into {len(documents)} chunks in {time()-start:0.2f} seconds."
displayInfo("Index", message)
Expand All @@ -68,7 +74,7 @@ def loadFolder(self, folder, setStatus):

def ask(self, question):
self.token_counter.reset_counts()
self.update_settings()
self.update_settings(self.host, self.llm_name)
node_postprocessors = [SimilarityPostprocessor(similarity_cutoff=settings.similarity_cutoff)]
query_engine = self.index.as_query_engine(similarity_top_k=settings.similarity_top_k, node_postprocessors = node_postprocessors, response_mode=settings.response_mode, streaming=True)
self.response = query_engine.query(question)
Expand Down
25 changes: 17 additions & 8 deletions VOLlama.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = 11
version = 12
import wx
import threading
import sounddevice as sd
Expand Down Expand Up @@ -76,10 +76,12 @@ def InitUI(self):
#self.Bind(wx.EVT_MENU, self.log, logMenu)

ragMenu= wx.Menu()
urlMenu = ragMenu.Append(wx.ID_ANY, "Index an &URL...\tCTRL+U")
self.Bind(wx.EVT_MENU, self.onUploadURLButton, urlMenu)
documentMenu = ragMenu.Append(wx.ID_ANY, "Index a Folder with Documents...\tCTRL+D")
self.Bind(wx.EVT_MENU, self.onUploadDocuments, documentMenu)
indexUrlMenu = ragMenu.Append(wx.ID_ANY, "Index &URL...\tCTRL+U")
self.Bind(wx.EVT_MENU, self.onIndexURL, indexUrlMenu)
indexFileMenu = ragMenu.Append(wx.ID_ANY, "Index &File...\tCTRL+F")
self.Bind(wx.EVT_MENU, self.onIndexFile, indexFileMenu)
indexFolderMenu = ragMenu.Append(wx.ID_ANY, "Index Directory...\tCTRL+D")
self.Bind(wx.EVT_MENU, self.onIndexFolder, indexFolderMenu)
loadIndexMenu = ragMenu.Append(wx.ID_ANY, "Load Index...")
self.Bind(wx.EVT_MENU, self.loadIndex, loadIndexMenu)
saveIndexMenu = ragMenu.Append(wx.ID_ANY, "Save Index...")
Expand Down Expand Up @@ -281,14 +283,22 @@ def onUploadImage(self,e):
self.model.image = file
self.prompt.SetFocus()

def onUploadDocuments(self,e):
def onIndexFile(self, event):
wildcard = "Supported Files (*.txt|*.pdf|*.docx|*.pptx|*.ppt|*.pptm|*.hwp|*.csv|*.epub|*.md|*.ipynb|*.mbox|*.jpg|*.png|*.jpeg|*.mp3|*.mp4)|*.txt|*.pdf|*.docx|*.pptx|*.ppt|*.pptm|**.hwp|*.csv|*.epub|*.md|*.ipynb|*.mbox|.jpg|*.png|*.jpeg|*.mp3|*.mp4"
with wx.FileDialog(self, "Choose a file", wildcard=wildcard, style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST|wx.FD_MULTIPLE) as fileDialog:
if fileDialog.ShowModal() == wx.ID_CANCEL: return
paths = fileDialog.GetPaths()
self.setStatus(f"Indexing {paths}")
threading.Thread(target=self.model.startRag, args=(paths, self.setStatus)).start()

def onIndexFolder(self,e):
with wx.DirDialog(None, "Choose a folder with documents:", style=wx.DD_DEFAULT_STYLE | wx.DD_DIR_MUST_EXIST) as dlg:
if dlg.ShowModal() == wx.ID_CANCEL: return
folder = dlg.GetPath()
self.setStatus(f"Indexing {folder}")
threading.Thread(target=self.model.startRag, args=(folder, self.setStatus)).start()

def onUploadURLButton(self, e):
def onIndexURL(self, e):
with wx.TextEntryDialog(self, "Enter an url to index::", "URL", value="https://") as dlg:
if dlg.ShowModal() == wx.ID_CANCEL: return
url = dlg.GetValue()
Expand All @@ -308,7 +318,6 @@ def loadIndex(self,e):
with wx.DirDialog(None, "Choose a folder with Index:", style=wx.DD_DEFAULT_STYLE | wx.DD_DIR_MUST_EXIST) as dlg:
if dlg.ShowModal() == wx.ID_CANCEL: return
folder = dlg.GetPath()
self.model.initRag()
self.model.rag.load_index(folder)

def saveIndex(self,e):
Expand Down
2 changes: 2 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Change log

* Index file
* Fixed bug when index gets reset when changing model
* Token counter on the status bar for RAG.
* Fixed response mode not working.
* Output similarity score for each chunk.
Expand Down

0 comments on commit 793278d

Please sign in to comment.