Skip to content

Commit

Permalink
Combine default generation parameters and settings into one file.
Browse files Browse the repository at this point in the history
  • Loading branch information
chigkim committed Feb 24, 2024
1 parent fe6125a commit 5a1caf1
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 121 deletions.
5 changes: 1 addition & 4 deletions Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(self):
self.rag = None
self.models = []
self.token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode)
self.init_llm()

def get_models(self):
ids = []
Expand All @@ -54,7 +53,6 @@ def get_models(self):
def init_llm(self):
if settings.model_name not in self.models:
settings.model_name = self.get_models()[0]
print(settings.model_name)
options = get_parameters()
if settings.llm_name == "Ollama":
Settings.llm = Ollama(model=settings.model_name, request_timeout=600, base_url=settings.host, additional_kwargs=options)
Expand Down Expand Up @@ -110,12 +108,10 @@ def startRag(self, path, setStatus):

def setHost(self, host):
settings.host = host
self.init_llm()

def setModel(self, name):
if settings.model_name == name: return
settings.model_name = name
self.init_llm()

def setSystem(self, system):
if system == "": return
Expand All @@ -126,6 +122,7 @@ def setSystem(self, system):
self.messages[0] = system

def ask(self, content, window):
self.init_llm()
self.token_counter.reset_counts()
if not self.image:
Settings.callback_manager = CallbackManager([self.token_counter])
Expand Down
1 change: 1 addition & 0 deletions Parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def get_parameters():
p = os.path.join(os.path.dirname(__file__), "default-parameters.json")
with open(p) as file:
default = json.load(file)
default = default['parameters']
if not hasattr(settings, 'parameters'):
settings.parameters = default
else:
Expand Down
20 changes: 2 additions & 18 deletions Settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,24 +72,8 @@ def save_settings(self):
json.dump(settings_dict, file, indent='\t')

def load_settings(self):
default_dict = {
'host': 'http://localhost:11434',
"llm_name": "Ollama",
"model_name": "",
"openai_api_key": "",
"gemini_api_key": "",
'system': "",
'speakResponse': False,
'voice': 'unknown',
'rate': 0.0,
'ragResponseMode': 'refine',
'chunk_size':1024,
'chunk_overlap':20,
'similarity_top_k':2,
'similarity_cutoff':0.0,
'response_mode': 'refine',
'show_context': False
}
p = os.path.join(os.path.dirname(__file__), "default-parameters.json")
default_dict = json.load(open(p))
try:
with open(self.settings_file_path, 'r') as file:
settings_dict = json.load(file)
Expand Down
22 changes: 13 additions & 9 deletions VOLlama.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,14 @@ def __init__(self, parent, title):
super(ChatWindow, self).__init__(parent, title=title, size=(1920,1080))
self.speech = Speech()
self.speech.speak("VOLlama is starting...")
self.model = Model()
self.model.setSystem(settings.system)
self.InitUI()
self.Maximize(True)
self.Centre()
self.Show()
self.model = Model()
self.model.setSystem(settings.system)
self.refreshModels()
self.prompt.SetFocus()
threading.Thread(target=check_update, args=(version,)).start()

def InitUI(self):
Expand Down Expand Up @@ -107,7 +110,7 @@ def InitUI(self):
self.modelList= wx.Choice(self.toolbar, choices=[])
self.modelList.Bind(wx.EVT_CHOICE, self.onSelectModel)
self.toolbar.AddControl(self.modelList, "Model")
self.refreshModels()

self.copyButton = wx.Button(self.toolbar, label="Copy Last Message")
self.toolbar.AddControl(self.copyButton, "Copy Message")
self.copyButton.Bind(wx.EVT_BUTTON, self.OnCopyMessage)
Expand All @@ -132,8 +135,6 @@ def InitUI(self):
vbox.Add(self.prompt, 2, wx.EXPAND | wx.LEFT | wx.RIGHT, 5)
vbox.Add(self.sendButton, 1, wx.EXPAND | wx.ALL, 5)
panel.SetSizer(vbox)
self.Maximize(True)
self.modelList.SetFocus()

def setStatus(self, text):
self.SetStatusText(text)
Expand All @@ -160,15 +161,20 @@ def refreshModels(self):
threading.Thread(target=self.getModels).start()

def getModels(self):
try: models = self.model.get_models()
try:
models = self.model.get_models()
except Exception as e:
displayError(e)
return
self.modelList.SetItems(models)
self.modelList.SetSelection(0)
if settings.model_name in models:
self.modelList.SetSelection(models.index(settings.model_name))
else:
self.modelList.SetSelection(0)
self.onSelectModel()
self.modelList.SetFocus()


def setHost(self, event):
dlg = wx.TextEntryDialog(self, "Enter the host address:", "Host", value=settings.host)
if dlg.ShowModal() == wx.ID_OK:
Expand All @@ -193,7 +199,6 @@ def setParameters(self, e):
with ParametersDialog(self, 'Generation Parameters') as dialog:
if dialog.ShowModal() == wx.ID_OK:
dialog.save()
self.model.init_llm()

def OnCopyModel(self, event):
with CopyDialog(self, title="Copy Model") as dlg:
Expand Down Expand Up @@ -338,7 +343,6 @@ def displayAPISettingsDialog(self, event):
self.copyModelMenu.Enable(settings.llm_name == "Ollama")
self.deleteModelMenu.Enable(settings.llm_name == "Ollama")
self.model.models = []
self.model.init_llm()
self.refreshModels()

def onShowRagSettings(self, event):
Expand Down
197 changes: 107 additions & 90 deletions default-parameters.json
Original file line number Diff line number Diff line change
@@ -1,92 +1,109 @@
{
"num_predict": {
"value": -1,
"description": "Maximum number of tokens to predict when generating text.",
"range": "-1 = infinity, -2 = until context filled"
},
"temperature": {
"value": 0.8,
"description": "Increasing the temperature will make the model answer more creatively.",
"range": "0.0-2.0"
},
"repeat_penalty": {
"value": 1.1,
"description": "Higher value will penalize repetitions more strongly.",
"range": "0.0-2.0"
},
"repeat_last_n": {
"value": 64,
"description": "Sets how far back for the model to look back to prevent repetition.",
"range": "0=disabled, -1=num_ctx"
},
"top_k": {
"value": 40,
"description": "Reduces the probability of generating nonsense.",
"range": "-1-100"
},
"top_p": {
"value": 0.9,
"description": "Works together with top-k. A higher value will lead to more diverse text, while a lower value will generate more focused and conservative text.",
"range": "0.0-1.0"
},
"tfs_z": {
"value": 1.0,
"description": "Reduces the impact of less probable tokens from the output. A higher value will reduce the impact more.",
"range": "0.0-1.0"
},
"typical_p": {
"value": 1.0,
"description": "Reduces the impact of less probable tokens from the output.",
"range": "0.0-1.0"
},
"presence_penalty": {
"value": 0.0,
"description": "Penalizes new tokens based on their presence in the text so far.",
"range": "0.0-1.0"
},
"frequency_penalty": {
"value": 0.0,
"description": "Penalizes new tokens based on their frequency in the text so far.",
"range": "0.0-1.0"
},
"mirostat": {
"value": 0,
"description": "Enables or disables mirostat.",
"range": "0=disable, 1=v1, 2=v2"
},
"mirostat_tau": {
"value": 5.0,
"description": "Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text.",
"range": "0.0-10.0"
},
"mirostat_eta": {
"value": 0.1,
"description": "Influences how quickly the algorithm responds to feedback from the generated text.",
"range": "0.0-1.0"
},
"num_keep": {
"value": 0,
"description": "Number of tokens to keep unchanged at the beginning of the generated text.",
"range": "Integer value"
},
"penalize_newline": {
"value": true,
"description": "Whether to penalize the generation of new lines.",
"range": "Boolean value"
},
"stop": {
"value": [],
"description": "When this pattern is encountered the LLM will stop generating text and return.",
"range": "Array of strings"
},
"seed": {
"value": -1,
"description": "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt.",
"range": "Integer value"
},
"num_ctx": {
"value": 4096,
"description": "Sets the size of the context window used to generate the next token.",
"range": "The max context length depends on the model."
}
"host": "http://localhost:11434",
"llm_name": "Ollama",
"model_name": "",
"openai_api_key": "",
"gemini_api_key": "",
"system": "",
"speakResponse": false,
"voice": "unknown",
"rate": 0.0,
"chunk_size":1024,
"chunk_overlap":20,
"similarity_top_k":2,
"similarity_cutoff":0.0,
"response_mode": "compact",
"show_context": false,
"parameters": {
"num_ctx": {
"value": 4096,
"description": "Sets the size of the context window used to generate the next token.",
"range": "The max context length depends on the model."
},
"num_predict": {
"value": -1,
"description": "Maximum number of tokens to predict when generating text.",
"range": "-1 = infinity, -2 = until context filled"
},
"temperature": {
"value": 0.8,
"description": "Increasing the temperature will make the model answer more creatively.",
"range": "0.0-2.0"
},
"repeat_penalty": {
"value": 1.1,
"description": "Higher value will penalize repetitions more strongly.",
"range": "0.0-2.0"
},
"repeat_last_n": {
"value": 64,
"description": "Sets how far back for the model to look back to prevent repetition.",
"range": "0=disabled, -1=num_ctx"
},
"top_k": {
"value": 40,
"description": "Reduces the probability of generating nonsense.",
"range": "-1-100"
},
"top_p": {
"value": 0.9,
"description": "Works together with top-k. A higher value will lead to more diverse text, while a lower value will generate more focused and conservative text.",
"range": "0.0-1.0"
},
"tfs_z": {
"value": 1.0,
"description": "Reduces the impact of less probable tokens from the output. A higher value will reduce the impact more.",
"range": "0.0-1.0"
},
"typical_p": {
"value": 1.0,
"description": "Reduces the impact of less probable tokens from the output.",
"range": "0.0-1.0"
},
"presence_penalty": {
"value": 0.0,
"description": "Penalizes new tokens based on their presence in the text so far.",
"range": "0.0-1.0"
},
"frequency_penalty": {
"value": 0.0,
"description": "Penalizes new tokens based on their frequency in the text so far.",
"range": "0.0-1.0"
},
"mirostat": {
"value": 0,
"description": "Enables or disables mirostat.",
"range": "0=disable, 1=v1, 2=v2"
},
"mirostat_tau": {
"value": 5.0,
"description": "Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text.",
"range": "0.0-10.0"
},
"mirostat_eta": {
"value": 0.1,
"description": "Influences how quickly the algorithm responds to feedback from the generated text.",
"range": "0.0-1.0"
},
"num_keep": {
"value": 0,
"description": "Number of tokens to keep unchanged at the beginning of the generated text.",
"range": "Integer value"
},
"penalize_newline": {
"value": true,
"description": "Whether to penalize the generation of new lines.",
"range": "Boolean value"
},
"stop": {
"value": [],
"description": "When this pattern is encountered the LLM will stop generating text and return.",
"range": "Array of strings"
},
"seed": {
"value": -1,
"description": "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt.",
"range": "Integer value"
}
}
}

0 comments on commit 5a1caf1

Please sign in to comment.