From 9ce2ab95908d21c5bdbb4ef6ad18d93ead3bc56c Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 11:51:06 +0200
Subject: [PATCH 01/10] refactor(telemetry): update event name

---
 src/app/api/api_v1/endpoints/code.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/app/api/api_v1/endpoints/code.py b/src/app/api/api_v1/endpoints/code.py
index 5df78de..41f23d5 100644
--- a/src/app/api/api_v1/endpoints/code.py
+++ b/src/app/api/api_v1/endpoints/code.py
@@ -29,7 +29,7 @@ async def chat(
     guidelines: GuidelineCRUD = Depends(get_guideline_crud),
     token_payload: TokenPayload = Security(get_quack_jwt, scopes=[UserScope.ADMIN, UserScope.USER]),
 ) -> StreamingResponse:
-    telemetry_client.capture(token_payload.sub, event="compute-chat")
+    telemetry_client.capture(token_payload.sub, event="code-chat")
     # Validate payload
     if len(payload.messages) == 0:
         raise HTTPException(

From c04ab7d1c1989dbcf2e43d29519cc7ada001f67f Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:08:09 +0200
Subject: [PATCH 02/10] feat(llm): add full support of OpenAI API

---
 src/app/core/config.py         |   2 +-
 src/app/services/llm/groq.py   |   8 +-
 src/app/services/llm/llm.py    |  32 ++-
 src/app/services/llm/ollama.py |  65 +-----
 src/app/services/llm/openai.py | 391 ++++-----------------------------
 src/app/services/llm/utils.py  |  65 ++++++
 6 files changed, 139 insertions(+), 424 deletions(-)
 create mode 100644 src/app/services/llm/utils.py

diff --git a/src/app/core/config.py b/src/app/core/config.py
index 714a490..2369b11 100644
--- a/src/app/core/config.py
+++ b/src/app/core/config.py
@@ -53,7 +53,7 @@ def sqlachmey_uri(cls, v: str) -> str:
     GROQ_API_KEY: Union[str, None] = os.environ.get("GROQ_API_KEY")
     GROQ_MODEL: str = os.environ.get("GROQ_MODEL", "llama3-8b-8192")
     OPENAI_API_KEY: Union[str, None] = os.environ.get("OPENAI_API_KEY")
-    OPENAI_MODEL: str = os.environ.get("OPENAI_MODEL", "gpt-4-turbo-2024-04-09")
+    OPENAI_MODEL: str = os.environ.get("OPENAI_MODEL", "gpt-4o-2024-05-13")
 
     # Error monitoring
     SENTRY_DSN: Union[str, None] = os.environ.get("SENTRY_DSN")
diff --git a/src/app/services/llm/groq.py b/src/app/services/llm/groq.py
index 15fdffc..983207a 100644
--- a/src/app/services/llm/groq.py
+++ b/src/app/services/llm/groq.py
@@ -11,6 +11,8 @@
 from groq import Groq, Stream
 from groq.lib.chat_completion_chunk import ChatCompletionChunk
 
+from .utils import CHAT_PROMPT
+
 logger = logging.getLogger("uvicorn.error")
 
 
@@ -20,12 +22,6 @@ class GroqModel(str, Enum):
     MIXTRAL_8X7b: str = "mixtral-8x7b-32768"
 
 
-CHAT_PROMPT = (
-    "You are an AI programming assistant, developed by the company Quack AI, and you only answer questions related to computer science "
-    "(refuse to answer for the rest)."
-)
-
-
 class GroqClient:
     def __init__(
         self,
diff --git a/src/app/services/llm/llm.py b/src/app/services/llm/llm.py
index 3497975..43c4330 100644
--- a/src/app/services/llm/llm.py
+++ b/src/app/services/llm/llm.py
@@ -3,16 +3,40 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
+import re
 from enum import Enum
-from typing import Union
+from typing import Dict, Union
+
+from fastapi import HTTPException, status
 
 from app.core.config import settings
 
 from .groq import GroqClient
 from .ollama import OllamaClient
+from .openai import OpenAIClient
 
 __all__ = ["llm_client"]
 
+EXAMPLE_PROMPT = (
+    "You are responsible for producing concise illustrations of the company coding guidelines. "
+    "This will be used to teach new developers our way of engineering software. "
+    "Make sure your code is in the specified programming language and functional, don't add extra comments or explanations.\n"
+    # Format
+    "You should output two code blocks: "
+    "a minimal code snippet where the instruction was correctly followed, "
+    "and the same snippet with minimal modifications that invalidates the instruction."
+)
+# Strangely, this doesn't work when compiled
+EXAMPLE_PATTERN = r"```[a-zA-Z]*\n(?P<positive>.*?)```\n.*```[a-zA-Z]*\n(?P<negative>.*?)```"
+
+
+def validate_example_response(response: str) -> Dict[str, str]:
+    matches = re.search(EXAMPLE_PATTERN, response.strip(), re.DOTALL)
+    if matches is None:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed output schema validation")
+
+    return matches.groupdict()
+
 
 class LLMProvider(str, Enum):
     OLLAMA: str = "ollama"
@@ -20,7 +44,7 @@ class LLMProvider(str, Enum):
     GROQ: str = "groq"
 
 
-llm_client: Union[OllamaClient, GroqClient]
+llm_client: Union[OllamaClient, GroqClient, OpenAIClient]
 if settings.LLM_PROVIDER == LLMProvider.OLLAMA:
     if not settings.OLLAMA_ENDPOINT:
         raise ValueError("Please provide a value for `OLLAMA_ENDPOINT`")
@@ -29,5 +53,9 @@ class LLMProvider(str, Enum):
     if not settings.GROQ_API_KEY:
         raise ValueError("Please provide a value for `GROQ_API_KEY`")
     llm_client = GroqClient(settings.GROQ_API_KEY, settings.GROQ_MODEL, settings.LLM_TEMPERATURE)  # type: ignore[arg-type]
+elif settings.LLM_PROVIDER == LLMProvider.OPENAI:
+    if not settings.OPENAI_API_KEY:
+        raise ValueError("Please provide a value for `OPENAI_API_KEY`")
+    llm_client = OpenAIClient(settings.OPENAI_API_KEY, settings.OPENAI_MODEL, settings.LLM_TEMPERATURE)  # type: ignore[arg-type]
 else:
     raise NotImplementedError("LLM provider is not implemented")
diff --git a/src/app/services/llm/ollama.py b/src/app/services/llm/ollama.py
index 3e42f1b..7f6660b 100644
--- a/src/app/services/llm/ollama.py
+++ b/src/app/services/llm/ollama.py
@@ -3,73 +3,16 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
-import json
 import logging
-import re
-from typing import Dict, Generator, List, TypeVar, Union
+from typing import Dict, Generator, List, Union
 
-from fastapi import HTTPException, status
 from ollama import Client
 
-logger = logging.getLogger("uvicorn.error")
-
-ValidationOut = TypeVar("ValidationOut")
-
-
-EXAMPLE_PROMPT = (
-    "You are responsible for producing concise illustrations of the company coding guidelines. "
-    "This will be used to teach new developers our way of engineering software. "
-    "Make sure your code is in the specified programming language and functional, don't add extra comments or explanations.\n"
-    # Format
-    "You should output two code blocks: "
-    "a minimal code snippet where the instruction was correctly followed, "
-    "and the same snippet with minimal modifications that invalidates the instruction."
-)
-# Strangely, this doesn't work when compiled
-EXAMPLE_PATTERN = r"```[a-zA-Z]*\n(?P<positive>.*?)```\n.*```[a-zA-Z]*\n(?P<negative>.*?)```"
-
-
-def validate_example_response(response: str) -> Dict[str, str]:
-    matches = re.search(EXAMPLE_PATTERN, response.strip(), re.DOTALL)
-    if matches is None:
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed output schema validation")
-
-    return matches.groupdict()
+from .utils import CHAT_PROMPT
 
+__all__ = ["OllamaClient"]
 
-PARSING_PROMPT = (
-    "You are responsible for summarizing the list of distinct coding guidelines for the company, by going through documentation. "
-    "This list will be used by developers to avoid hesitations in code reviews and to onboard new members. "
-    "Consider only guidelines that can be verified for a specific snippet of code (nothing about git, commits or community interactions) "
-    "by a human developer without running additional commands or tools, it should only relate to the code within each file. "
-    "Only include guidelines for which you could generate positive and negative code snippets, "
-    "don't invent anything that isn't present in the input text.\n"
-    # Format
-    "You should answer with a list of JSON dictionaries, one dictionary per guideline, where each dictionary has two keys with string values:\n"
-    "- title: a short summary title of the guideline\n"
-    "- details: a descriptive, comprehensive and inambiguous explanation of the guideline."
-)
-PARSING_PATTERN = r"\{\s*\"title\":\s+\"(?P<title>.*?)\",\s+\"details\":\s+\"(?P<details>.*?)\"\s*\}"
-
-
-CHAT_PROMPT = (
-    "You are an AI programming assistant, developed by the company Quack AI, and you only answer questions related to computer science "
-    "(refuse to answer for the rest)."
-)
-
-GUIDELINE_PROMPT = (
-    "When answering user requests, you should at all times keep in mind the following software development guidelines:"
-)
-
-
-def validate_parsing_response(response: str) -> List[Dict[str, str]]:
-    guideline_list = json.loads(response.strip())
-    if not isinstance(guideline_list, list) or any(
-        not isinstance(val, str) for guideline in guideline_list for val in guideline.values()
-    ):
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed output schema validation")
-
-    return json.loads(response.strip())
+logger = logging.getLogger("uvicorn.error")
 
 
 class OllamaClient:
diff --git a/src/app/services/llm/openai.py b/src/app/services/llm/openai.py
index 07eedf0..0b7726f 100644
--- a/src/app/services/llm/openai.py
+++ b/src/app/services/llm/openai.py
@@ -3,386 +3,69 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
-import json
 import logging
-from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Type, TypeVar, Union
+from typing import Dict, Generator, List, Union, cast
 
-import requests
-from fastapi import HTTPException, status
-from pydantic import ValidationError
+from openai import OpenAI, Stream
+from openai.types.chat import ChatCompletionChunk
 
-from app.models import Guideline
-from app.schemas.code import ComplianceResult
-from app.schemas.guidelines import GuidelineContent, GuidelineExample
-from app.schemas.services import (
-    ArraySchema,
-    ChatCompletion,
-    FieldSchema,
-    ObjectSchema,
-    OpenAIChatRole,
-    OpenAIFunction,
-    OpenAIMessage,
-    OpenAIModel,
-)
+from .utils import CHAT_PROMPT
 
 logger = logging.getLogger("uvicorn.error")
 
-# __all__ = ["openai_client"]
 
-
-class ExecutionMode(str, Enum):
-    SINGLE = "single-thread"
-    MULTI = "multi-thread"
-
-
-MONO_SCHEMA = ObjectSchema(
-    type="object",
-    properties={
-        "is_compliant": FieldSchema(
-            type="boolean",
-            description="whether the guideline has been followed in the code snippet",
-        ),
-        "comment": FieldSchema(
-            type="string",
-            description="short instruction to make the snippet compliant, addressed to the developer who wrote the code. Should be empty if the snippet is compliant",
-        ),
-        # "suggestion": FieldSchema(type="string", description="the modified code snippet that meets the guideline, with minimal modifications. Should be empty if the snippet is compliant"),
-    },
-    required=["is_compliant", "comment"],
-)
-
-MONO_PROMPT = (
-    "As a code compliance agent, you are going to receive requests from user with two elements: a code snippet, and a guideline. "
-    "You should answer in JSON format with an analysis result. The comment should be an empty string if the code is compliant with the guideline."
-)
-
-
-MULTI_SCHEMA = ObjectSchema(
-    type="object",
-    properties={
-        "result": ArraySchema(
-            type="array",
-            items=ObjectSchema(
-                type="object",
-                properties={
-                    "is_compliant": FieldSchema(
-                        type="boolean",
-                        description="whether the guideline has been followed in the code snippet",
-                    ),
-                    "comment": FieldSchema(
-                        type="string",
-                        description="short instruction to make the snippet compliant, addressed to the developer who wrote the code. Should be empty if the snippet is compliant",
-                    ),
-                    # "suggestion": FieldSchema(type="string", description="the modified code snippet that meets the guideline, with minimal modifications. Should be empty if the snippet is compliant"),
-                },
-                required=["is_compliant", "comment"],
-            ),
-        ),
-    },
-    required=["result"],
-)
-
-MULTI_PROMPT = (
-    "As a code compliance agent, you are going to receive requests from user with two elements: a code snippet, and a list of guidelines. "
-    "You should answer in JSON format with a list of compliance results, one for each guideline, no more, no less (in the same order). "
-    "For a given compliance results, the comment should be an empty string if the code is compliant with the corresponding guideline."
-)
-
-PARSING_SCHEMA = ObjectSchema(
-    type="object",
-    properties={
-        "result": ArraySchema(
-            type="array",
-            items=ObjectSchema(
-                type="object",
-                properties={
-                    "source": FieldSchema(
-                        type="string",
-                        description="the text section the guideline was extracted from",
-                    ),
-                    "category": {
-                        "type": "string",
-                        "description": "the high-level category of the guideline",
-                        "enum": [
-                            "naming",
-                            "error handling",
-                            "syntax",
-                            "comments",
-                            "docstring",
-                            "documentation",
-                            "testing",
-                            "signature",
-                            "type hint",
-                            "formatting",
-                            "other",
-                        ],
-                    },
-                    "details": FieldSchema(
-                        type="string",
-                        description="a descriptive, comprehensive, inambiguous and specific explanation of the guideline.",
-                    ),
-                    "title": FieldSchema(type="string", description="a summary title of the guideline"),
-                    "examples": ObjectSchema(
-                        type="object",
-                        properties={
-                            "positive": FieldSchema(
-                                type="string",
-                                description="a short code snippet where the guideline was correctly followed.",
-                            ),
-                            "negative": FieldSchema(
-                                type="string",
-                                description="the same snippet with minimal modification that invalidate the instruction.",
-                            ),
-                        },
-                        required=["positive", "negative"],
-                    ),
-                },
-                required=["category", "details", "title", "examples"],
-            ),
-        ),
-    },
-    required=["result"],
-)
-
-PARSING_PROMPT = (
-    "You are responsible for summarizing the list of distinct coding guidelines for the company, by going through documentation. "
-    "This list will be used by developers to avoid hesitations in code reviews and to onboard new members. "
-    "Consider only guidelines that can be verified for a specific snippet of code (nothing about git, commits or community interactions) "
-    "by a human developer without running additional commands or tools, it should only relate to the code within each file. "
-    "Only include guidelines for which you could generate positive and negative code snippets, "
-    "don't invent anything that isn't present in the input text or someone will die. "
-    "You should answer in JSON format with only the list of string guidelines."
-)
-
-EXAMPLE_SCHEMA = ObjectSchema(
-    type="object",
-    properties={
-        "positive": FieldSchema(
-            type="string",
-            description="a short code snippet where the instruction was correctly followed.",
-        ),
-        "negative": FieldSchema(
-            type="string",
-            description="the same snippet with minimal modification that invalidate the instruction.",
-        ),
-    },
-    required=["positive", "negative"],
-)
-
-EXAMPLE_PROMPT = (
-    "You are responsible for producing concise code snippets to illustrate the company coding guidelines. "
-    "This will be used to teach new developers our way of engineering software. "
-    "You should answer in JSON format with only two short code snippets in the specified programming language: one that follows the rule correctly, "
-    "and a similar version with minimal modifications that violates the rule. "
-    "Make sure your code is functional, don't extra comments or explanation, or someone will die."
-)
-
-ModelInp = TypeVar("ModelInp")
-
-
-def validate_model(model: Type[ModelInp], data: Dict[str, Any]) -> Union[ModelInp, None]:
-    try:
-        return model(**data)
-    except (ValidationError, TypeError):
-        return None
+class OpenAIModel(str, Enum):
+    # https://platform.openai.com/docs/models/overview
+    GPT4o: str = "gpt-4o-2024-05-13"
+    GPT3_5: str = "gpt-3.5-turbo-0125"
 
 
 class OpenAIClient:
-    ENDPOINT: str = "https://api.openai.com/v1/chat/completions"
-
     def __init__(
         self,
         api_key: str,
         model: OpenAIModel,
         temperature: float = 0.0,
-        frequency_penalty: float = 1.0,
     ) -> None:
-        self.headers = self._get_headers(api_key)
+        self._client = OpenAI(api_key=api_key)
         # Validate model
-        model_card = requests.get(f"https://api.openai.com/v1/models/{model}", headers=self.headers, timeout=5)
-        if model_card.status_code != 200:
-            raise HTTPException(status_code=model_card.status_code, detail=model_card.json()["error"]["message"])
+        model_card = self._client.models.retrieve(model)
         self.model = model
         self.temperature = temperature
-        self.frequency_penalty = frequency_penalty
         logger.info(
-            f"Using OpenAI model: {self.model} (created at {datetime.fromtimestamp(model_card.json()['created']).isoformat()})",
+            f"Using OpenAI w/ {self.model} (created at {datetime.fromtimestamp(model_card.created).isoformat()})",
         )
 
-    @staticmethod
-    def _get_headers(api_key: str) -> Dict[str, str]:
-        return {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-
-    def check_code_against_guidelines(
-        self,
-        code: str,
-        guidelines: List[Guideline],
-        mode: ExecutionMode = ExecutionMode.SINGLE,
-        **kwargs,
-    ) -> List[ComplianceResult]:
-        # Check args before sending a request
-        if len(code) == 0 or len(guidelines) == 0 or any(len(guideline.content) == 0 for guideline in guidelines):
-            raise HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail="No code or guideline provided for analysis.",
-            )
-        # Ideas: check which programming language & whether it's correct code
-        if mode == ExecutionMode.SINGLE:
-            parsed_response = self._analyze(
-                MULTI_PROMPT,
-                {"code": code, "guidelines": [guideline.content for guideline in guidelines]},
-                MULTI_SCHEMA,
-                **kwargs,
-            )["result"]
-            if len(parsed_response) != len(guidelines):
-                raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Invalid model response")
-        elif mode == ExecutionMode.MULTI:
-            with ThreadPoolExecutor() as executor:
-                tasks = [
-                    executor.submit(
-                        self._analyze,
-                        MONO_PROMPT,
-                        {"code": code, "guideline": guideline.content},
-                        MONO_SCHEMA,
-                        **kwargs,
-                    )
-                    for guideline in guidelines
-                ]
-            # Collect results
-            parsed_response = [task.result() for task in tasks]
-        else:
-            raise ValueError("invalid value for argument `mode`")
-
-        # Return with pydantic validation
-        return [
-            # ComplianceResult(is_compliant=res["is_compliant"], comment="" if res["is_compliant"] else res["comment"])
-            ComplianceResult(guideline_id=guideline.id, **res)
-            for guideline, res in zip(guidelines, parsed_response)
-        ]
-
-    def check_code(self, code: str, guideline: Guideline, **kwargs) -> ComplianceResult:
-        # Check args before sending a request
-        if len(code) == 0 or len(guideline.content) == 0:
-            raise HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail="No code or guideline provided for analysis.",
-            )
-        res = self._analyze(MONO_PROMPT, {"code": code, "guideline": guideline.content}, MONO_SCHEMA, **kwargs)
-        # Return with pydantic validation
-        return ComplianceResult(guideline_id=guideline.id, **res)
-
-    def _request(
+    def chat(
         self,
-        system_prompt: str,
-        openai_fn: OpenAIFunction,
-        message: str,
-        timeout: int = 20,
-        model: Union[OpenAIModel, None] = None,
-        user_id: Union[str, None] = None,
-    ) -> Dict[str, Any]:
+        messages: List[Dict[str, str]],
+        system: Union[str, None] = None,
+    ) -> Generator[str, None, None]:
         # Prepare the request
-        _payload = ChatCompletion(
-            model=model or self.model,
-            messages=[
-                OpenAIMessage(
-                    role=OpenAIChatRole.SYSTEM,
-                    content=system_prompt,
+        _system = CHAT_PROMPT if not system else f"{CHAT_PROMPT} {system}"
+        stream = cast(
+            Stream[ChatCompletionChunk],
+            self._client.chat.completions.create(  # type: ignore[call-overload]
+                messages=(
+                    {"role": "system", "content": _system},
+                    *messages,
                 ),
-                OpenAIMessage(
-                    role=OpenAIChatRole.USER,
-                    content=message,
-                ),
-            ],
-            functions=[openai_fn],
-            function_call={"name": openai_fn.name},
-            temperature=self.temperature,
-            frequency_penalty=self.frequency_penalty,
-            user=user_id,
-        )
-        # Send the request
-        response = requests.post(self.ENDPOINT, json=_payload.model_dump(), headers=self.headers, timeout=timeout)
-
-        # Check status
-        if response.status_code != 200:
-            raise HTTPException(status_code=response.status_code, detail=response.json()["error"]["message"])
-
-        return json.loads(response.json()["choices"][0]["message"]["function_call"]["arguments"])
-
-    def _analyze(
-        self,
-        prompt: str,
-        payload: Dict[str, Any],
-        schema: ObjectSchema,
-        **kwargs,
-    ) -> Dict[str, Any]:
-        return self._request(
-            prompt,
-            OpenAIFunction(
-                name="analyze_code",
-                description="Analyze code",
-                parameters=schema,
+                model=self.model,
+                # Optional
+                temperature=self.temperature,
+                max_tokens=2048,
+                top_p=1,
+                stop=None,
+                stream=True,
+                stream_options={"include_usage": True},
             ),
-            json.dumps(payload),
-            **kwargs,
-        )
-
-    def parse_guidelines_from_text(
-        self,
-        corpus: str,
-        **kwargs,
-    ) -> List[GuidelineContent]:
-        if not isinstance(corpus, str):
-            raise HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail="The input corpus needs to be a string.",
-            )
-        if len(corpus) == 0:
-            return []
-
-        response = self._request(
-            PARSING_PROMPT,
-            OpenAIFunction(
-                name="validate_guidelines_from_text",
-                description="Validate extracted coding guidelines from corpus",
-                parameters=PARSING_SCHEMA,
-            ),
-            json.dumps(corpus),
-            **kwargs,
-        )
-        guidelines = [validate_model(GuidelineContent, elt) for elt in response["result"]]
-        if any(guideline is None for guideline in guidelines):
-            logger.info("Validation errors on some guidelines")
-        return [guideline for guideline in guidelines if guideline is not None]
-
-    def generate_examples_for_instruction(
-        self,
-        instruction: str,
-        language: str,
-        **kwargs,
-    ) -> GuidelineExample:
-        if (
-            not isinstance(instruction, str)
-            or len(instruction) == 0
-            or not isinstance(language, str)
-            or len(language) == 0
-        ):
-            raise HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail="The instruction and language need to be non-empty strings.",
-            )
-
-        return GuidelineExample(
-            **self._request(
-                EXAMPLE_PROMPT,
-                OpenAIFunction(
-                    name="generate_examples_from_instruction",
-                    description="Generate code examples for a coding instruction",
-                    parameters=EXAMPLE_SCHEMA,
-                ),
-                json.dumps({"instruction": instruction, "language": language}),
-                **kwargs,
-            )
         )
+        for chunk in stream:
+            if len(chunk.choices) > 0 and isinstance(chunk.choices[0].delta.content, str):
+                yield chunk.choices[0].delta.content
+            if chunk.usage:
+                logger.info(
+                    f"OpenAI ({self.model}): {chunk.usage.prompt_tokens} prompt tokens | {chunk.usage.completion_tokens} completion tokens",
+                )
diff --git a/src/app/services/llm/utils.py b/src/app/services/llm/utils.py
new file mode 100644
index 0000000..d7e6fe0
--- /dev/null
+++ b/src/app/services/llm/utils.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2024, Quack AI.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
+
+import json
+import re
+from typing import Dict, List
+
+from fastapi import HTTPException, status
+
+__all__ = ["CHAT_PROMPT"]
+
+EXAMPLE_PROMPT = (
+    "You are responsible for producing concise illustrations of the company coding guidelines. "
+    "This will be used to teach new developers our way of engineering software. "
+    "Make sure your code is in the specified programming language and functional, don't add extra comments or explanations.\n"
+    # Format
+    "You should output two code blocks: "
+    "a minimal code snippet where the instruction was correctly followed, "
+    "and the same snippet with minimal modifications that invalidates the instruction."
+)
+# Strangely, this doesn't work when compiled
+EXAMPLE_PATTERN = r"```[a-zA-Z]*\n(?P<positive>.*?)```\n.*```[a-zA-Z]*\n(?P<negative>.*?)```"
+
+PARSING_PROMPT = (
+    "You are responsible for summarizing the list of distinct coding guidelines for the company, by going through documentation. "
+    "This list will be used by developers to avoid hesitations in code reviews and to onboard new members. "
+    "Consider only guidelines that can be verified for a specific snippet of code (nothing about git, commits or community interactions) "
+    "by a human developer without running additional commands or tools, it should only relate to the code within each file. "
+    "Only include guidelines for which you could generate positive and negative code snippets, "
+    "don't invent anything that isn't present in the input text.\n"
+    # Format
+    "You should answer with a list of JSON dictionaries, one dictionary per guideline, where each dictionary has two keys with string values:\n"
+    "- title: a short summary title of the guideline\n"
+    "- details: a descriptive, comprehensive and inambiguous explanation of the guideline."
+)
+PARSING_PATTERN = r"\{\s*\"title\":\s+\"(?P<title>.*?)\",\s+\"details\":\s+\"(?P<details>.*?)\"\s*\}"
+
+CHAT_PROMPT = (
+    "You are an AI programming assistant, developed by the company Quack AI, and you only answer questions related to computer science "
+    "(refuse to answer for the rest)."
+)
+
+GUIDELINE_PROMPT = (
+    "When answering user requests, you should at all times keep in mind the following software development guidelines:"
+)
+
+
+def validate_example_response(response: str) -> Dict[str, str]:
+    matches = re.search(EXAMPLE_PATTERN, response.strip(), re.DOTALL)
+    if matches is None:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed output schema validation")
+
+    return matches.groupdict()
+
+
+def validate_parsing_response(response: str) -> List[Dict[str, str]]:
+    guideline_list = json.loads(response.strip())
+    if not isinstance(guideline_list, list) or any(
+        not isinstance(val, str) for guideline in guideline_list for val in guideline.values()
+    ):
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed output schema validation")
+
+    return json.loads(response.strip())

From 18ca3248f697d1abf5cf17fe58f66bb15eae2887 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:08:37 +0200
Subject: [PATCH 03/10] build(deps): add openai

---
 poetry.lock    | 36 +++++++++++++++++++++++++++++++++++-
 pyproject.toml |  2 ++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 84d506c..02050eb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1552,6 +1552,29 @@ files = [
 [package.dependencies]
 httpx = ">=0.27.0,<0.28.0"
 
+[[package]]
+name = "openai"
+version = "1.30.1"
+description = "The official Python library for the openai API"
+optional = false
+python-versions = ">=3.7.1"
+files = [
+    {file = "openai-1.30.1-py3-none-any.whl", hash = "sha256:c9fb3c3545c118bbce8deb824397b9433a66d0d0ede6a96f7009c95b76de4a46"},
+    {file = "openai-1.30.1.tar.gz", hash = "sha256:4f85190e577cba0b066e1950b8eb9b11d25bc7ebcc43a86b326ce1bfa564ec74"},
+]
+
+[package.dependencies]
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.7,<5"
+
+[package.extras]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
+
 [[package]]
 name = "orjson"
 version = "3.9.15"
@@ -2803,6 +2826,17 @@ files = [
 [package.dependencies]
 urllib3 = ">=2"
 
+[[package]]
+name = "types-urllib3"
+version = "1.26.25.14"
+description = "Typing stubs for urllib3"
+optional = false
+python-versions = "*"
+files = [
+    {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"},
+    {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"},
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.8.0"
@@ -3022,4 +3056,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "5e3fc4ebd14893da2f70f02dea074e976425f3eda4634643859a8a8b32d48348"
+content-hash = "e534d24c22548f9876c27ffaa4dbf5afdb4123971a1edfbfb2fb3ff8b482b3a1"
diff --git a/pyproject.toml b/pyproject.toml
index e8a2b9b..ee9f586 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,7 @@ posthog = "^3.0.0"
 prometheus-fastapi-instrumentator = "^6.1.0"
 groq = "^0.5.0"
 ollama = "^0.1.9"
+openai = "^1.29.0"
 uvloop = "^0.19.0"
 httptools = "^0.6.1"
 
@@ -40,6 +41,7 @@ optional = true
 ruff = "==0.4.4"
 mypy = "==1.10.0"
 types-requests = ">=2.0.0"
+types-urllib3 = ">=1.26.25"
 types-passlib = ">=1.7.0"
 pre-commit = "^3.6.0"
 

From 94442e1a13e86c96fb62b8c91bf4b8eaa98b9c68 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:09:44 +0200
Subject: [PATCH 04/10] fix(docker): update docker var in prod

---
 docker-compose.prod.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index f57284c..98db081 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -72,7 +72,7 @@ services:
       - POSTGRES_URL=postgresql+asyncpg://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}
       - SUPERADMIN_LOGIN=${SUPERADMIN_LOGIN}
       - SUPERADMIN_PWD=${SUPERADMIN_PWD}
-      - SECRET_KEY=${SECRET_KEY}
+      - JWT_SECRET=${JWT_SECRET}
       - OLLAMA_ENDPOINT=http://ollama:11434
       - OLLAMA_MODEL=${OLLAMA_MODEL}
       - OLLAMA_TIMEOUT=${OLLAMA_TIMEOUT:-60}

From 27dbbae216cfff3460a558d7532e89374776a8c8 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:09:59 +0200
Subject: [PATCH 05/10] feat(docker): add env vars

---
 docker-compose.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 0d9598c..28158ff 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -56,6 +56,8 @@ services:
       - OLLAMA_MODEL=${OLLAMA_MODEL}
       - GROQ_API_KEY=${GROQ_API_KEY}
       - GROQ_MODEL=${GROQ_MODEL}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - OPENAI_MODEL=${OPENAI_MODEL}
       - OLLAMA_TIMEOUT=${OLLAMA_TIMEOUT:-60}
       - SUPPORT_EMAIL=${SUPPORT_EMAIL}
       - DEBUG=true

From e54bd3621c808fd95af2b8613dcfbcf1701f2e8d Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:13:52 +0200
Subject: [PATCH 06/10] test(llm): add tests for OpenAI support

---
 src/tests/services/test_llm.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/tests/services/test_llm.py b/src/tests/services/test_llm.py
index a5b1c92..7167c3e 100644
--- a/src/tests/services/test_llm.py
+++ b/src/tests/services/test_llm.py
@@ -8,6 +8,7 @@
 from app.core.config import settings
 from app.services.llm.groq import GroqClient
 from app.services.llm.ollama import OllamaClient
+from app.services.llm.openai import OpenAIClient
 
 
 @pytest.mark.parametrize(
@@ -49,3 +50,21 @@ def test_groqclient_chat():
     assert isinstance(stream, types.GeneratorType)
     for chunk in stream:
         assert isinstance(chunk, str)
+
+
+def test_openaiclient_constructor():
+    with pytest.raises(AuthenticationError):
+        OpenAIClient("api_key", settings.OPENAI_MODEL)
+    if isinstance(settings.OPENAI_API_KEY, str):
+        with pytest.raises(NotFoundError):
+            OpenAIClient(settings.OPENAI_API_KEY, "quack")
+        OpenAIClient(settings.OPENAI_API_KEY, settings.OPENAI_MODEL)
+
+
+@pytest.mark.skipif("settings.OPENAI_API_KEY is None")
+def test_openaiclient_chat():
+    llm_client = OpenAIClient(settings.OPENAI_API_KEY, settings.OPENAI_MODEL)
+    stream = llm_client.chat([{"role": "user", "content": "hello"}])
+    assert isinstance(stream, types.GeneratorType)
+    for chunk in stream:
+        assert isinstance(chunk, str)

From 1079e08a4acfc44c482aa45a80959d37b9136660 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:19:03 +0200
Subject: [PATCH 07/10] refactor(schemas): remove legacy schemas

---
 src/app/schemas/services.py | 56 ++++---------------------------------
 1 file changed, 5 insertions(+), 51 deletions(-)

diff --git a/src/app/schemas/services.py b/src/app/schemas/services.py
index 62919c3..37b8ee0 100644
--- a/src/app/schemas/services.py
+++ b/src/app/schemas/services.py
@@ -4,65 +4,21 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
 from enum import Enum
-from typing import Any, Dict, List, Union
+from typing import List, Union
 
 from pydantic import BaseModel, HttpUrl
 
 __all__ = ["ChatCompletion"]
 
 
-class OpenAIModel(str, Enum):
-    # https://platform.openai.com/docs/models/overview
-    GPT3_5_TURBO: str = "gpt-3.5-turbo-0125"
-    GPT3_5_TURBO_LEGACY: str = "gpt-3.5-turbo-1106"
-    GPT4_TURBO: str = "gpt-4-0125-preview"
-    GPT4_TURBO_LEGACY: str = "gpt-4-1106-preview"
-
-
-class OpenAIChatRole(str, Enum):
+class ChatRole(str, Enum):
     SYSTEM: str = "system"
     USER: str = "user"
     ASSISTANT: str = "assistant"
 
 
-class FieldSchema(BaseModel):
-    type: str
-    description: str
-
-
-class ObjectSchema(BaseModel):
-    type: str = "object"
-    properties: Dict[str, Any]
-    required: List[str]
-
-
-class ArraySchema(BaseModel):
-    type: str = "array"
-    items: ObjectSchema
-
-
-class OpenAIFunction(BaseModel):
-    name: str
-    description: str
-    parameters: ObjectSchema
-
-
-class OpenAITool(BaseModel):
-    type: str = "function"
-    function: OpenAIFunction
-
-
-class _FunctionName(BaseModel):
-    name: str
-
-
-class _OpenAIToolChoice(BaseModel):
-    type: str = "function"
-    function: _FunctionName
-
-
-class OpenAIMessage(BaseModel):
-    role: OpenAIChatRole
+class ChatMessage(BaseModel):
+    role: ChatRole
     content: str
 
 
@@ -72,9 +28,7 @@ class _ResponseFormat(BaseModel):
 
 class ChatCompletion(BaseModel):
     model: str
-    messages: List[OpenAIMessage]
-    functions: List[OpenAIFunction]
-    function_call: Dict[str, str]
+    messages: List[ChatMessage]
     temperature: float = 0.0
     frequency_penalty: float = 1.0
     response_format: _ResponseFormat = _ResponseFormat(type="json_object")

From fe752a714650dce4cb9757c9d5faa1e2fe4a3f85 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:43:42 +0200
Subject: [PATCH 08/10] docs(llm): add mention of alternative LLM providers

---
 .env.example                     |  4 ++++
 CONTRIBUTING.md                  | 10 ++++++++--
 docs/developers/self-hosting.mdx |  2 +-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/.env.example b/.env.example
index 158b471..3653517 100644
--- a/.env.example
+++ b/.env.example
@@ -21,6 +21,10 @@ LLM_PROVIDER='ollama'
 OLLAMA_MODEL='dolphin-llama3:8b-v2.9-q4_K_M'
 # Smaller option
 # OLLAMA_MODEL='tinydolphin:1.1b-v2.8-q4_K_M'
+GROQ_API_KEY=
+GROQ_MODEL='llama3-8b-8192'
+OPENAI_API_KEY=
+OPENAI_MODEL='gpt-4o-2024-05-13'
 LLM_TEMPERATURE=0
 JWT_SECRET=
 SENTRY_DSN=
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 913766e..e62515d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -77,10 +77,12 @@ If you are wondering how to do something with Companion API, or a more general q
 - [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git)
 - [Docker](https://docs.docker.com/engine/install/)
 - [Docker compose](https://docs.docker.com/compose/)
-- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and a GPU (>= 6 Gb VRAM for good performance/latency balance)
+- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and a GPU (>= 6 Gb VRAM for good performance/latency balance)*
 - [Poetry](https://python-poetry.org/docs/)
 - [Make](https://www.gnu.org/software/make/) (optional)
 
+_*If you don't have a GPU, you can use alternative LLM providers (currently supported: Groq, OpenAI)_
+
 
 ### Configure your fork
 
@@ -130,8 +132,12 @@ This file contains all the information to run the project.
 - `SUPERADMIN_PWD`: the password of the initial admin user
 
 #### Other optional values
-- `SECRET_KEY`: if set, tokens can be reused between sessions. All instances sharing the same secret key can use the same token.
+- `JWT_SECRET`: if set, tokens can be reused between sessions. All instances sharing the same secret key can use the same token.
 - `OLLAMA_MODEL`: the model tag in [Ollama library](https://ollama.com/library) that will be used for the API.
+- `GROQ_API_KEY`: your [Groq API KEY](https://console.groq.com/keys), required if you select `groq` as `LLM_PROVIDER`.
+- `GROQ_MODEL`: the model tag in [Groq supported models](https://console.groq.com/docs/models) that will be used for the API.
+- `OPENAI_API_KEY`: your [OpenAI API KEY](https://platform.openai.com/api-keys), required if you select `openai` as `LLM_PROVIDER`.
+- `OPENAI_MODEL`: the model tag in [OpenAI supported models](https://platform.openai.com/docs/models) that will be used for the API.
 - `SENTRY_DSN`: the DSN for your [Sentry](https://sentry.io/) project, which monitors back-end errors and report them back.
 - `SERVER_NAME`: the server tag that will be used to report events to Sentry.
 - `POSTHOG_HOST`: the host for PostHog [PostHog](https://eu.posthog.com/settings/project-details).
diff --git a/docs/developers/self-hosting.mdx b/docs/developers/self-hosting.mdx
index 0699c3f..aa03dc0 100644
--- a/docs/developers/self-hosting.mdx
+++ b/docs/developers/self-hosting.mdx
@@ -10,7 +10,7 @@ Whatever your installation method, you'll need at least the following to be inst
 1. [Docker](https://docs.docker.com/engine/install/) (and [Docker compose](https://docs.docker.com/compose/) if you're using an old version)
 2. [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and a GPU
 
-_We recommend min 5Gb of VRAM on your GPU for good performance/latency balance._
+_We recommend min 5Gb of VRAM on your GPU for good performance/latency balance. Please note that by default, this will run your LLM locally (available offline) but if you don't have a GPU, you can use online LLM providers (currently supported: Groq, OpenAI)_
 
 ### 60 seconds setup ⏱️
 

From 5dc8d99c22bec6e5ae16435420046db2124a2605 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 12:46:14 +0200
Subject: [PATCH 09/10] ci(push): update docker update

---
 .github/workflows/push.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 201f383..1c4a839 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -72,11 +72,11 @@ jobs:
             docker rmi -f $(docker images -f "dangling=true" -q)
             docker volume rm -f $(docker volume ls -f "dangling=true" -q)
             # Update the service
-            docker compose pull backend gradio
-            docker compose stop backend gradio && docker compose up -d --wait
+            docker compose pull backend chat
+            docker compose stop backend chat && docker compose up -d --wait
             # Check update
             docker inspect -f '{{ .Created }}' $(docker compose images -q backend)
-            docker inspect -f '{{ .Created }}' $(docker compose images -q gradio)
+            docker inspect -f '{{ .Created }}' $(docker compose images -q chat)
             # Clean up
             docker rm -fv $(docker ps -aq)
             docker rmi -f $(docker images -f "dangling=true" -q)

From 72b175e4be4cae2bea9ecd05d9ff9b9b4a38f022 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 15 May 2024 13:07:52 +0200
Subject: [PATCH 10/10] test(llm): update error catching

---
 src/tests/services/test_llm.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/tests/services/test_llm.py b/src/tests/services/test_llm.py
index 7167c3e..8704cdd 100644
--- a/src/tests/services/test_llm.py
+++ b/src/tests/services/test_llm.py
@@ -1,9 +1,12 @@
 import types
 
 import pytest
-from groq import AuthenticationError, NotFoundError
+from groq import AuthenticationError as GAuthError
+from groq import NotFoundError as GNotFoundError
 from httpx import ConnectError
 from ollama import ResponseError
+from openai import AuthenticationError as OAIAuthError
+from openai import NotFoundError as OAINotFounderError
 
 from app.core.config import settings
 from app.services.llm.groq import GroqClient
@@ -35,10 +38,10 @@ def test_ollamaclient_chat():
 
 
 def test_groqclient_constructor():
-    with pytest.raises(AuthenticationError):
+    with pytest.raises(GAuthError):
         GroqClient("api_key", settings.GROQ_MODEL)
     if isinstance(settings.GROQ_API_KEY, str):
-        with pytest.raises(NotFoundError):
+        with pytest.raises(GNotFoundError):
             GroqClient(settings.GROQ_API_KEY, "quack")
         GroqClient(settings.GROQ_API_KEY, settings.GROQ_MODEL)
 
@@ -53,10 +56,10 @@ def test_groqclient_chat():
 
 
 def test_openaiclient_constructor():
-    with pytest.raises(AuthenticationError):
+    with pytest.raises(OAIAuthError):
         OpenAIClient("api_key", settings.OPENAI_MODEL)
     if isinstance(settings.OPENAI_API_KEY, str):
-        with pytest.raises(NotFoundError):
+        with pytest.raises(OAINotFounderError):
             OpenAIClient(settings.OPENAI_API_KEY, "quack")
         OpenAIClient(settings.OPENAI_API_KEY, settings.OPENAI_MODEL)