diff --git a/.env-template b/.env-template index 9a037d6..905164f 100644 --- a/.env-template +++ b/.env-template @@ -13,6 +13,7 @@ OLLAMA_API_BASE=http://host.docker.internal:11434 # Cloud Models (Optional) OPENAI_API_KEY= +OPENAI_API_BASE= GROQ_API_KEY= AZURE_DEPLOYMENT_NAME= diff --git a/docker-compose.dev.yaml b/docker-compose.dev.yaml index a963691..a7036a2 100644 --- a/docker-compose.dev.yaml +++ b/docker-compose.dev.yaml @@ -20,6 +20,7 @@ services: - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434} - OPENAI_API_KEY=${OPENAI_API_KEY} + - OPENAI_API_BASE=${OPENAI_API_BASE} - GROQ_API_KEY=${GROQ_API_KEY} - AZURE_DEPLOYMENT_NAME=${AZURE_DEPLOYMENT_NAME} diff --git a/src/backend/constants.py b/src/backend/constants.py index 06eae66..c6144a6 100644 --- a/src/backend/constants.py +++ b/src/backend/constants.py @@ -9,10 +9,11 @@ class ChatModel(str, Enum): LLAMA_3_70B = "llama-3-70b" GPT_4o = "gpt-4o" - GPT_3_5_TURBO = "gpt-3.5-turbo" + GPT_4o_mini = "gpt-4o-mini" + COMMAND_R = "command-r" # Local models - LOCAL_LLAMA_3 = "llama3" + LOCAL_LLAMA_3 = "llama3.1" LOCAL_GEMMA = "gemma" LOCAL_MISTRAL = "mistral" LOCAL_PHI3_14B = "phi3:14b" @@ -22,10 +23,10 @@ class ChatModel(str, Enum): model_mappings: dict[ChatModel, str] = { - ChatModel.GPT_3_5_TURBO: "gpt-3.5-turbo", ChatModel.GPT_4o: "gpt-4o", - ChatModel.LLAMA_3_70B: "groq/llama3-70b-8192", - ChatModel.LOCAL_LLAMA_3: "ollama_chat/llama3", + ChatModel.GPT_4o_mini: "gpt-4o-mini", + ChatModel.LLAMA_3_70B: "groq/llama-3.1-70b-versatile", + ChatModel.LOCAL_LLAMA_3: "ollama_chat/llama3.1", ChatModel.LOCAL_GEMMA: "ollama_chat/gemma", ChatModel.LOCAL_MISTRAL: "ollama_chat/mistral", ChatModel.LOCAL_PHI3_14B: "ollama_chat/phi3:14b", @@ -39,7 +40,7 @@ def get_model_string(model: ChatModel) -> str: raise ValueError("CUSTOM_MODEL is not set") return custom_model - if model in {ChatModel.GPT_3_5_TURBO, ChatModel.GPT_4o}: + if model in {ChatModel.GPT_4o_mini, ChatModel.GPT_4o}: openai_mode = os.environ.get("OPENAI_MODE", "openai") if openai_mode == "azure": # Currently deployments are named "gpt-35-turbo" and "gpt-4o" diff --git a/src/backend/llm/base.py b/src/backend/llm/base.py index 18b9f6a..b87088e 100644 --- a/src/backend/llm/base.py +++ b/src/backend/llm/base.py @@ -41,7 +41,10 @@ def __init__( raise ValueError(f"Missing keys: {validation['missing_keys']}") self.llm = LiteLLM(model=model) - self.client = instructor.from_litellm(completion) + if 'groq' in model or 'ollama_chat' in model: + self.client = instructor.from_litellm(completion, mode=instructor.Mode.MD_JSON) + else: + self.client = instructor.from_litellm(completion) async def astream(self, prompt: str) -> CompletionResponseAsyncGen: return await self.llm.astream_complete(prompt) diff --git a/src/backend/schemas.py b/src/backend/schemas.py index fe9faf8..91d9a02 100644 --- a/src/backend/schemas.py +++ b/src/backend/schemas.py @@ -35,7 +35,7 @@ class ChatRequest(BaseModel, plugin_settings=record_all): thread_id: int | None = None query: str history: List[Message] = Field(default_factory=list) - model: ChatModel = ChatModel.GPT_3_5_TURBO + model: ChatModel = ChatModel.GPT_4o_mini pro_search: bool = False diff --git a/src/backend/validators.py b/src/backend/validators.py index 494fc1f..82044aa 100644 --- a/src/backend/validators.py +++ b/src/backend/validators.py @@ -5,7 +5,7 @@ def validate_model(model: ChatModel): - if model in {ChatModel.GPT_3_5_TURBO, ChatModel.GPT_4o}: + if model in {ChatModel.GPT_4o_mini, ChatModel.GPT_4o}: OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") if not OPENAI_API_KEY: raise ValueError("OPENAI_API_KEY environment variable not found") diff --git a/src/frontend/generated/types.gen.ts b/src/frontend/generated/types.gen.ts index dae0845..0320528 100644 --- a/src/frontend/generated/types.gen.ts +++ b/src/frontend/generated/types.gen.ts @@ -67,8 +67,8 @@ export type ChatMessage = { export enum ChatModel { LLAMA_3_70B = "llama-3-70b", GPT_4O = "gpt-4o", - GPT_3_5_TURBO = "gpt-3.5-turbo", - LLAMA3 = "llama3", + GPT_4O_MINI = "gpt-4o-mini", + LLAMA3 = "llama3.1", GEMMA = "gemma", MISTRAL = "mistral", PHI3_14B = "phi3:14b", diff --git a/src/frontend/src/components/model-selection.tsx b/src/frontend/src/components/model-selection.tsx index 910637b..c995e7e 100644 --- a/src/frontend/src/components/model-selection.tsx +++ b/src/frontend/src/components/model-selection.tsx @@ -37,10 +37,10 @@ type Model = { }; export const modelMap: Record = { - [ChatModel.GPT_3_5_TURBO]: { + [ChatModel.GPT_4O_MINI]: { name: "Fast", - description: "OpenAI/GPT-3.5-turbo", - value: ChatModel.GPT_3_5_TURBO, + description: "OpenAI/GPT-4o-mini", + value: ChatModel.GPT_4O_MINI, smallIcon: , icon: , }, @@ -60,7 +60,7 @@ export const modelMap: Record = { }, [ChatModel.LLAMA3]: { name: "Llama3", - description: "ollama/llama3", + description: "ollama/llama3.1", value: ChatModel.LLAMA3, smallIcon: , icon: , @@ -123,7 +123,7 @@ const ModelItem: React.FC<{ model: Model }> = ({ model }) => ( export function ModelSelection() { const { localMode, model, setModel, toggleLocalMode } = useConfigStore(); - const selectedModel = modelMap[model] ?? modelMap[ChatModel.GPT_3_5_TURBO]; + const selectedModel = modelMap[model] ?? modelMap[ChatModel.GPT_4O_MINI]; return (