diff --git a/services/backend/api_server/Dockerfile b/services/backend/api_server/Dockerfile
new file mode 100644
index 0000000..c1f459e
--- /dev/null
+++ b/services/backend/api_server/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.10-slim-buster
+
+# Set the working directory in the container to /app
+WORKDIR /app
+
+# Add the current directory contents into the container at /app
+ADD . /app
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements_backend.txt
+
+# Make port 80 available to the world outside this container
+EXPOSE 8000
+
+ENV HOST=host.docker.internal
+
+# Run app.py when the container launches
+CMD ["uvicorn", "app.backend:app", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/services/backend/api_server/__init__.py b/services/backend/api_server/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/services/backend/api_server/app/__init__.py b/services/backend/api_server/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/services/backend/api_server/app/backend.py b/services/backend/api_server/app/backend.py
new file mode 100644
index 0000000..8a0af30
--- /dev/null
+++ b/services/backend/api_server/app/backend.py
@@ -0,0 +1,1553 @@
+# Standard Library
+import asyncio
+import json
+import logging
+import os
+import uuid
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+# Third Party
+import aioredis
+import requests
+from beanie import init_beanie
+from celery import Celery
+from fastapi import FastAPI, Form, HTTPException, UploadFile, WebSocket
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from fastapi.websockets import WebSocketState
+from motor.motor_asyncio import AsyncIOMotorClient
+from openai import OpenAI
+from pydantic import BaseModel
+from pymongo import ASCENDING, DESCENDING
+from starlette.websockets import WebSocketDisconnect
+
+from .backend_models import (
+    ApiKeysUpdateModel,
+    AssistantFileObject,
+    AssistantObject,
+    CreateAssistantRequest,
+    FilesStorageObject,
+    FileUpload,
+    ListAssistantFilesResponse,
+    ListAssistantsResponse,
+    ListFilesResponse,
+    ListMessagesResponse,
+    ListRunStepsResponse,
+    MessageObject,
+    OpenAIFile,
+    RunObject,
+    RunStepObject,
+    ThreadObject,
+)
+from .helpers import (
+    generate_assistant_id,
+    generate_message_id,
+    generate_run_id,
+    generate_thread_id,
+)
+from .models import (
+    AssistantToolsBrowser,
+    AssistantToolsRetrieval,
+    CreateAssistantFileRequest,
+    CreateChatCompletionRequest,
+    CreateChatCompletionResponse,
+    CreateFileRequest,
+    CreateMessageRequest,
+    CreateRunRequest,
+    CreateThreadRequest,
+    DeleteAssistantFileResponse,
+    DeleteAssistantResponse,
+    DeleteFileResponse,
+    DeleteThreadResponse,
+    ListModelsResponse,
+    MessageContentImageFileObject,
+    MessageContentTextObject,
+    Model,
+    ModifyAssistantRequest,
+    ModifyMessageRequest,
+    ModifyRunRequest,
+    ModifyThreadRequest,
+    Object,
+    Object7,
+    Object8,
+    Object14,
+    Object20,
+    Object21,
+    Object22,
+    Object23,
+    Object24,
+    Object25,
+    Object28,
+    Object29,
+    Order1,
+    Order3,
+    Order5,
+    Order7,
+    Order9,
+    Order11,
+    Purpose,
+    Purpose1,
+    Role7,
+    Role8,
+    Status,
+    Status2,
+    SubmitToolOutputsRunRequest,
+    Text,
+    Type6,
+    Type8,
+    Type13,
+    Type824,
+)
+
+litellm_host = os.getenv("LITELLM_HOST", "localhost")
+redis_host = os.getenv("REDIS_HOST", "localhost")
+mongodb_host = os.getenv("MONGODB_HOST", "localhost")
+
+app = FastAPI()
+
+origins = [
+    "http://localhost:3000",  # Add the frontend host here
+    "http://localhost",
+    "https://docs.rubra.ai",
+]
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+
+# MongoDB Configurationget
+MONGODB_URL = f"mongodb://{mongodb_host}:27017"
+DATABASE_NAME = "rubra_db"
+LITELLM_URL = f"http://{litellm_host}:8002"
+HEADERS = {"accept": "application/json", "Content-Type": "application/json"}
+
+# Initialize MongoDB client
+mongo_client = AsyncIOMotorClient(MONGODB_URL)
+database = mongo_client[DATABASE_NAME]
+
+celery_app = Celery(broker=f"redis://{redis_host}:6379/0")
+
+logging.basicConfig(level=logging.INFO)
+
+
+def get_database():
+    return database
+
+
+@app.on_event("startup")
+async def on_startup():
+    await init_beanie(
+        database=get_database(),
+        document_models=[
+            AssistantObject,
+            ThreadObject,
+            MessageObject,
+            RunObject,
+            OpenAIFile,
+            FilesStorageObject,
+            AssistantFileObject,
+        ],
+    )
+
+    available_models = [r.id for r in litellm_list_model().data]
+    if not available_models:
+        logging.warning("No models configured.")
+        return
+
+    # TODO: model names should be configurable instead of hard-coded
+    welcome_asst_instruction = "You are a welcoming assistant that greets users to Rubra - an LLM tool that makes it easy to create AI assistants."
+    tool_use_instruction = "You have access to a web browser tool, so let the user know that you can browse the web to answer queries."
+
+    tool_enabled_model_pool = ["custom", "gpt-4-1106-preview"]
+    welcome_asst_model = "custom"  # default to custom model
+    if welcome_asst_model not in available_models:
+        if "gpt-4-1106-preview" in available_models:
+            welcome_asst_model = "gpt-4-1106-preview"
+        else:
+            welcome_asst_model = available_models[0]
+
+    if welcome_asst_model in tool_enabled_model_pool:
+        welcome_asst_instruction += tool_use_instruction
+
+    # Create the Welcome Assistant if it doesn't exist
+    existing_assistant = await AssistantObject.find_one({"id": "asst_welcome"})
+    if not existing_assistant:
+        logging.info("Creating Welcome Assistant")
+        assistant = AssistantObject(
+            assistant_id="asst_welcome",
+            object=Object20.assistant.value,
+            created_at=int(datetime.now().timestamp()),
+            name="Welcome Assistant",
+            description="Welcome Assistant",
+            model=welcome_asst_model,
+            instructions=welcome_asst_instruction,
+            tools=[{"type": Type824.retrieval.value}]
+            if welcome_asst_model in tool_enabled_model_pool
+            else [],  # browser
+            file_ids=[],
+            metadata={},
+        )
+        await assistant.insert()
+
+
+@app.get("/get_api_key_status", tags=["API Keys"])
+async def get_api_key_status():
+    try:
+        redis = await aioredis.from_url(
+            f"redis://{redis_host}:6379/0", encoding="utf-8", decode_responses=True
+        )
+        openai_key = await redis.get("OPENAI_API_KEY")
+        anthropic_key = await redis.get("ANTHROPIC_API_KEY")
+
+        # Convert the string values to booleans
+        openai_key_status = openai_key.lower() == "true" if openai_key else False
+        anthropic_key_status = (
+            anthropic_key.lower() == "true" if anthropic_key else False
+        )
+
+        return {
+            "OPENAI_API_KEY": openai_key_status,
+            "ANTHROPIC_API_KEY": anthropic_key_status,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/set_api_keys", tags=["API Keys"])
+async def set_api_key_status(api_keys: ApiKeysUpdateModel):
+    try:
+        redis = await aioredis.from_url(
+            f"redis://{redis_host}:6379/0", encoding="utf-8", decode_responses=True
+        )
+
+        logging.info("Setting API keys")
+        logging.info(api_keys)
+
+        async with redis:
+            if api_keys.OPENAI_API_KEY is not None:
+                logging.info("Setting OPENAI_API_KEY" + str(api_keys.OPENAI_API_KEY))
+                await redis.set("OPENAI_API_KEY", str(api_keys.OPENAI_API_KEY))
+            if api_keys.ANTHROPIC_API_KEY is not None:
+                await redis.set("ANTHROPIC_API_KEY", str(api_keys.ANTHROPIC_API_KEY))
+
+        return {"message": "API key status updated successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/assistants", response_model=AssistantObject, tags=["Assistants"])
+async def create_assistant(body: CreateAssistantRequest) -> AssistantObject:
+    """
+    Create an assistant with a model and instructions.
+    """
+    # Generate a unique ID for the assistant
+    assistant_id = generate_assistant_id()
+    logging.info("assistant_id: %s", assistant_id)
+
+    # Create an AssistantObject from the request data
+    assistant = AssistantObject(
+        assistant_id=assistant_id,
+        object="assistant",
+        created_at=int(datetime.now().timestamp()),
+        name=body.name or "",
+        description=body.description or "",
+        model=body.model,
+        instructions=body.instructions or "",
+        tools=body.tools or [],
+        file_ids=body.file_ids or [],
+        metadata=body.metadata or {},
+    )
+
+    # Save the assistant to MongoDB
+    await assistant.insert()
+
+    for file_id in assistant.file_ids:
+        await _create_assistant_file(assistant_id=assistant_id, file_id=file_id)
+
+    return assistant
+
+
+@app.get("/assistants", response_model=ListAssistantsResponse, tags=["Assistants"])
+async def list_assistants(
+    limit: Optional[int] = 20,
+    order: Optional[Order1] = "desc",
+    after: Optional[str] = None,
+    before: Optional[str] = None,
+) -> ListAssistantsResponse:
+    """
+    Returns a list of assistants.
+    """
+    query = {}
+
+    # Apply 'after' and 'before' filters
+    if after:
+        query["assistant_id"] = {"$gt": after}
+    if before:
+        query["assistant_id"] = {"$lt": before}
+
+    # Define sorting order
+    sort_order = DESCENDING if order == "desc" else ASCENDING
+
+    # Prepare the query for assistants
+    find_query = AssistantObject.find(query).sort([("assistant_id", sort_order)])
+
+    # Retrieve assistants from MongoDB
+    assistants = await find_query.to_list(limit)
+
+    # Check if there are more results
+    total_count = await find_query.count()
+    has_more = len(assistants) < total_count
+
+    # Prepare the response
+    list_response = ListAssistantsResponse(
+        object="list",
+        data=assistants,
+        first_id=assistants[0].assistant_id if assistants else "",
+        last_id=assistants[-1].assistant_id if assistants else "",
+        has_more=has_more,
+    )
+
+    return list_response
+
+
+@app.get(
+    "/assistants/{assistant_id}", response_model=AssistantObject, tags=["Assistants"]
+)
+async def get_assistant(assistant_id: str) -> AssistantObject:
+    """
+    Retrieves an assistant.
+    """
+    # Query the MongoDB database for the assistant using the 'id' field
+    assistant = await AssistantObject.find_one({"id": assistant_id})
+
+    # Check if the assistant data was found
+    if not assistant:
+        raise HTTPException(
+            status_code=404, detail=f"Assistant with ID '{assistant_id}' not found"
+        )
+
+    return assistant
+
+
+@app.post(
+    "/assistants/{assistant_id}", response_model=AssistantObject, tags=["Assistants"]
+)
+async def modify_assistant(
+    assistant_id: str, body: ModifyAssistantRequest
+) -> AssistantObject:
+    """
+    Modifies an assistant.
+    """
+    # Query the MongoDB database for the assistant using the 'id' field
+    existing_assistant = await AssistantObject.find_one({"id": assistant_id})
+
+    # Check if the assistant exists
+    if not existing_assistant:
+        raise HTTPException(
+            status_code=404, detail=f"Assistant with ID '{assistant_id}' not found"
+        )
+
+    # Update the assistant object with new data from the request
+    updated_fields = body.dict(exclude_unset=True)
+    for key, value in updated_fields.items():
+        setattr(existing_assistant, key, value)
+
+    # TODO: take care of assistant file creation and deletion.
+
+    # Save the updated assistant to MongoDB
+    await existing_assistant.save()
+
+    return existing_assistant
+
+
+@app.delete(
+    "/assistants/{assistant_id}",
+    response_model=DeleteAssistantResponse,
+    tags=["Assistants"],
+)
+async def delete_assistant(assistant_id: str) -> DeleteAssistantResponse:
+    """
+    Delete an assistant.
+    """
+    # Query the MongoDB database for the assistant using the 'id' field
+    existing_assistant = await AssistantObject.find_one({"id": assistant_id})
+
+    # Check if the assistant exists
+    if not existing_assistant:
+        raise HTTPException(
+            status_code=404, detail=f"Assistant with ID '{assistant_id}' not found"
+        )
+
+    # Delete assistant files
+    if existing_assistant.file_ids:
+        # Local
+        from app.vector_db.milvus.main import drop_collection
+
+        for file_id in existing_assistant.file_ids:
+            existing_assistant_file = await AssistantFileObject.find_one(
+                {"id": file_id, "assistant_id": assistant_id}
+            )
+            if existing_assistant_file:
+                await existing_assistant_file.delete()
+            else:
+                logging.warning(
+                    f"assistant file {file_id} for assistant {assistant_id} not found"
+                )
+        drop_collection(assistant_id)
+
+    # Delete the assistant from MongoDB
+    await existing_assistant.delete()
+
+    # Return response indicating successful deletion
+    return DeleteAssistantResponse(
+        id=assistant_id, deleted=True, object=Object21.assistant_deleted
+    )
+
+
+@app.post("/threads", response_model=ThreadObject, tags=["Assistants"])
+async def create_thread(body: CreateThreadRequest = None) -> ThreadObject:
+    """
+    Create a thread.
+    """
+    # Generate a unique ID for the thread with the specified format
+    thread_id = generate_thread_id()
+
+    # Create MessageObjects from the provided messages
+    messages = []
+    if body and body.messages:
+        for msg_request in body.messages:
+            # Convert Role8 to Role7 if valid
+            if msg_request.role.value in Role7.__members__:
+                role = Role7(msg_request.role.value)
+            else:
+                raise HTTPException(
+                    status_code=400, detail=f"Invalid role: {msg_request.role}"
+                )
+
+            # Create a MessageObject
+            message_id = generate_message_id()
+            message = MessageObject(
+                message_id=message_id,
+                object=Object25.thread_message,
+                created_at=int(datetime.now().timestamp()),
+                thread_id=thread_id,
+                role=role,
+                content=[
+                    MessageContentTextObject(
+                        type=Type13.text,
+                        text=Text(value=msg_request.content, annotations=[]),
+                    )
+                ],  # TODO need to address MessageContentImageFileObject
+                file_ids=msg_request.file_ids if msg_request.file_ids else [],
+                metadata=msg_request.metadata if msg_request.metadata else {},
+                assistant_id="",
+                run_id="",
+            )
+            messages.append(message)
+
+    # Create a ThreadObject
+    thread = ThreadObject(
+        thread_id=thread_id,
+        object=Object23.thread,
+        created_at=int(datetime.now().timestamp()),
+        metadata=body.metadata if body and body.metadata else {},
+    )
+
+    await thread.insert()
+    tasks = []
+    for msg in messages:
+        tasks.append(msg.insert())
+    await asyncio.gather(*tasks)
+
+    return thread
+
+
+@app.get("/threads/{thread_id}", response_model=ThreadObject, tags=["Assistants"])
+async def get_thread(thread_id: str) -> ThreadObject:
+    """
+    Retrieves a thread.
+    """
+    # Query the MongoDB database for the thread using the 'thread_id' field
+    thread = await ThreadObject.find_one({"id": thread_id})
+
+    # Check if the thread was found
+    if not thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    return thread
+
+
+@app.post("/threads/{thread_id}", response_model=ThreadObject, tags=["Assistants"])
+async def modify_thread(thread_id: str, body: ModifyThreadRequest) -> ThreadObject:
+    """
+    Modifies a thread.
+    """
+    # Fetch the existing thread data from MongoDB
+    existing_thread = await ThreadObject.find_one({"id": thread_id})
+
+    # Check if the thread exists
+    if not existing_thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    # Update the thread metadata by merging it with new data from the request
+    if body.metadata is not None:
+        existing_thread.metadata = existing_thread.metadata or {}
+        existing_thread.metadata.update(body.metadata)
+
+    # Save the updated thread back in MongoDB
+    await existing_thread.save()
+
+    return existing_thread
+
+
+@app.delete(
+    "/threads/{thread_id}", response_model=DeleteThreadResponse, tags=["Assistants"]
+)
+async def delete_thread(thread_id: str) -> DeleteThreadResponse:
+    """
+    Delete a thread.
+    """
+    # Check if the thread exists in MongoDB
+    existing_thread = await ThreadObject.find_one({"id": thread_id})
+
+    # If the thread does not exist, raise an HTTP 404 error
+    if not existing_thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    # Delete the thread from MongoDB
+    await existing_thread.delete()
+
+    # Return response indicating successful deletion
+    return DeleteThreadResponse(
+        id=thread_id, deleted=True, object=Object24.thread_deleted
+    )
+
+
+@app.post(
+    "/threads/{thread_id}/messages", response_model=MessageObject, tags=["Assistants"]
+)
+async def create_message(thread_id: str, body: CreateMessageRequest) -> MessageObject:
+    """
+    Create a message in a thread.
+    """
+    # Generate a unique ID for the message
+    message_id = generate_message_id()
+
+    # Ensure the thread exists
+    thread = await ThreadObject.find_one({"id": thread_id})
+    if not thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    # Convert the content string to a MessageContentTextObject
+    content = MessageContentTextObject(
+        type=Type13.text,  # Since we are dealing with only text content
+        text=Text(value=body.content, annotations=[]),
+    )
+
+    # Create a MessageObject with the text content
+    message = MessageObject(
+        message_id=message_id,
+        object=Object25.thread_message,  # Replace with the correct enum or object for 'thread.message'
+        created_at=int(datetime.now().timestamp()),
+        thread_id=thread_id,
+        role=Role7(body.role.value),  # Convert Role8 to Role7 if required and valid
+        content=[content],  # List containing the text content object
+        assistant_id="",  # Populate if applicable
+        run_id="",  # Populate if applicable
+        file_ids=body.file_ids or [],
+        metadata=body.metadata or {},
+    )
+
+    # Insert the message into MongoDB
+    await message.insert()
+
+    return message
+
+
+@app.get(
+    "/threads/{thread_id}/messages/{message_id}",
+    response_model=MessageObject,
+    tags=["Assistants"],
+)
+async def get_message(thread_id: str, message_id: str) -> MessageObject:
+    """
+    Retrieve a message from a thread.
+    """
+    # Check if the thread exists
+    thread = await ThreadObject.find_one({"id": thread_id})
+    if not thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    # Query the MongoDB database for the message using 'message_id' and 'thread_id'
+    message = await MessageObject.find_one({"id": message_id, "thread_id": thread_id})
+
+    # Check if the message was found
+    if not message:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Message with ID '{message_id}' in thread '{thread_id}' not found",
+        )
+
+    return message
+
+
+@app.post(
+    "/threads/{thread_id}/messages/{message_id}",
+    response_model=MessageObject,
+    tags=["Assistants"],
+)
+async def modify_message(
+    thread_id: str, message_id: str, body: ModifyMessageRequest
+) -> MessageObject:
+    """
+    Modifies a message within a thread.
+    """
+    # Check if the thread exists
+    thread = await ThreadObject.find_one({"id": thread_id})
+    if not thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    # Query the MongoDB database for the message using 'message_id' and 'thread_id'
+    message = await MessageObject.find_one({"id": message_id, "thread_id": thread_id})
+
+    # Check if the message was found
+    if not message:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Message with ID '{message_id}' in thread '{thread_id}' not found",
+        )
+
+    # Update the message metadata by merging it with new data from the request
+    if body.metadata is not None:
+        message.metadata = message.metadata or {}
+        message.metadata.update(body.metadata)
+
+    # Save the updated message back in MongoDB
+    await message.save()
+
+    return message
+
+
+@app.get(
+    "/threads/{thread_id}/messages",
+    response_model=ListMessagesResponse,
+    tags=["Assistants"],
+)
+async def list_messages(
+    thread_id: str,
+    limit: Optional[int] = 20,
+    order: Optional[Order3] = "desc",
+    after: Optional[str] = None,
+    before: Optional[str] = None,
+) -> ListMessagesResponse:
+    """
+    Returns a list of messages for a given thread.
+    """
+    # Check if the thread exists
+    thread = await ThreadObject.find_one({"id": thread_id})
+    if not thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    # Define sorting order
+    sort_order = DESCENDING if order == "desc" else ASCENDING
+
+    # Prepare the query for messages
+    query = {"thread_id": thread_id}
+    if after:
+        query["message_id"] = {"$gt": after}
+    if before:
+        query["message_id"] = {"$lt": before}
+
+    # Retrieve messages from MongoDB
+    find_query = MessageObject.find(query).sort([("created_at", sort_order)])
+    messages = await find_query.to_list(limit)
+
+    # Check if there are more results
+    total_count = await find_query.count()
+    has_more = len(messages) < total_count
+
+    # Prepare the response
+    list_response = ListMessagesResponse(
+        object="list",
+        data=messages,
+        first_id=messages[0].message_id if messages else None,
+        last_id=messages[-1].message_id if messages else None,
+        has_more=has_more,
+    )
+
+    return list_response
+
+
+async def redis_subscriber(channel, timeout=1):
+    logging.info(f"Connecting to Redis and subscribing to channel: {channel}")
+    redis = await aioredis.from_url(
+        f"redis://{redis_host}:6379/0", encoding="utf-8", decode_responses=True
+    )
+    pubsub = redis.pubsub()
+    await pubsub.subscribe(channel)
+
+    while True:
+        try:
+            message = await asyncio.wait_for(
+                pubsub.get_message(ignore_subscribe_messages=True), timeout=timeout
+            )
+            if message and message["type"] == "message":
+                yield message["data"]
+            else:
+                yield None  # Yield None if no message is received
+        except asyncio.TimeoutError:
+            yield None  # Yield None on timeout
+
+    logging.info(f"Unsubscribing from Redis channel: {channel}")
+    await pubsub.unsubscribe(channel)
+    await redis.close()
+
+
+async def listen_for_task_status(
+    task_status_channel, status_update_event, thread_id, run_id
+):
+    logging.info(f"Listening for task status on channel: {task_status_channel}")
+    redis = None
+    pubsub = None
+    try:
+        redis = await aioredis.from_url(
+            f"redis://{redis_host}:6379/0", encoding="utf-8", decode_responses=True
+        )
+        pubsub = redis.pubsub()
+        await pubsub.subscribe(task_status_channel)
+
+        while True:
+            message = await pubsub.get_message(
+                ignore_subscribe_messages=True, timeout=1.0
+            )
+            if message:
+                if message["type"] == "message":
+                    task_status = json.loads(message["data"])
+                    if (
+                        task_status["thread_id"] == thread_id
+                        and task_status["run_id"] == run_id
+                    ):
+                        logging.info(f"Received task status update: {task_status}")
+                        status_update_event.set()
+                        break
+            await asyncio.sleep(0.1)  # Prevents the loop from being blocking
+    except Exception as e:
+        logging.error(f"Error in listen_for_task_status: {e}")
+    finally:
+        if pubsub:
+            await pubsub.unsubscribe(task_status_channel)
+            await pubsub.close()
+        if redis:
+            await redis.close()
+
+
+@app.websocket("/ws/{thread_id}/{run_id}")
+async def websocket_endpoint(websocket: WebSocket, thread_id: str, run_id: str):
+    await websocket.accept()
+    logging.info(f"WebSocket connection opened for thread {thread_id}, run {run_id}")
+    status_update_event = asyncio.Event()
+    channel = f"task_status_{thread_id}"
+    asyncio.create_task(
+        listen_for_task_status(channel, status_update_event, thread_id, run_id)
+    )
+    try:
+        await send_messages_to_websocket(
+            websocket, thread_id, run_id, status_update_event
+        )
+    except WebSocketDisconnect:
+        logging.info(f"WebSocket disconnected on thread {thread_id}, run {run_id}")
+    except Exception as e:
+        logging.error(f"WebSocket error on thread {thread_id}, run {run_id}: {e}")
+    finally:
+        await websocket.close()
+        logging.info(f"WebSocket for thread {thread_id}, run {run_id} closed")
+
+
+async def send_messages_to_websocket(
+    websocket: WebSocket, thread_id: str, run_id: str, status_update_event
+):
+    logging.info(f"Sending messages to WebSocket for thread {thread_id}, run {run_id}")
+    while not status_update_event.is_set():
+        async for message in redis_subscriber(thread_id):
+            if message is None:  # Check if message is None and continue if it is
+                if status_update_event.is_set():
+                    logging.info("Status update event set, breaking loop")
+                    break
+                continue
+
+            if websocket.client_state == WebSocketState.DISCONNECTED:
+                logging.info("Client disconnected websocket")
+                return  # Exit the function as the client is disconnected
+            await websocket.send_text(message)
+
+    logging.info("Closing connection as task is completed or failed")
+    await websocket.send_text("CLOSE_CONNECTION")
+
+
+@app.post("/threads/{thread_id}/runs", response_model=RunObject, tags=["Assistants"])
+async def create_run(thread_id: str, body: CreateRunRequest) -> RunObject:
+    """
+    Create a run.
+    """
+    # Generate a unique ID for the run
+    run_id = generate_run_id()
+
+    # Ensure the thread exists
+    thread = await ThreadObject.find_one({"id": thread_id})
+    if not thread:
+        raise HTTPException(
+            status_code=404, detail=f"Thread with ID '{thread_id}' not found"
+        )
+
+    # Fetch the assistant details
+    assistant = await AssistantObject.find_one({"id": body.assistant_id})
+    if not assistant:
+        raise HTTPException(
+            status_code=404, detail=f"Assistant with ID '{body.assistant_id}' not found"
+        )
+
+    # Use tools and file_ids from the assistant, or default if not present
+    tools = assistant.tools or []
+    file_ids = assistant.file_ids or []
+
+    # Create a RunObject with the provided data and default values
+    run = RunObject(
+        run_id=run_id,
+        object=Object22.thread_run,
+        created_at=int(datetime.now().timestamp()),
+        thread_id=thread_id,
+        assistant_id=body.assistant_id,
+        status=Status2.queued,  # Default status to 'queued'
+        required_action=None,
+        last_error=None,
+        expires_at=None,  # Set if applicable
+        started_at=None,  # Set when the run starts
+        cancelled_at=None,
+        failed_at=None,
+        completed_at=None,  # Set when the run completes
+        model=body.model
+        or assistant.model,  # Use the model from the request or assistant
+        instructions=body.instructions or assistant.instructions,
+        tools=tools,
+        file_ids=file_ids,
+        metadata=body.metadata or {},
+    )
+
+    logging.info(run)
+
+    # Insert the run into MongoDB
+    await run.insert()
+
+    # Dispatch the task and set initial run status to 'queued'
+    redis_channel = f"{thread_id}"
+    celery_app.send_task(
+        "app.tasks.execute_chat_completion",
+        args=[body.assistant_id, thread_id, redis_channel, run_id],
+    )
+
+    await run.save()
+
+    return run
+
+
+@app.get(
+    "/threads/{thread_id}/runs/{run_id}", response_model=RunObject, tags=["Assistants"]
+)
+async def get_run(thread_id: str, run_id: str) -> RunObject:
+    """
+    Retrieves a run associated with a given thread.
+    """
+    # Query the MongoDB database for the run using 'run_id' and 'thread_id'
+    run = await RunObject.find_one({"id": run_id, "thread_id": thread_id})
+
+    # Check if the run was found
+    if not run:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Run with ID '{run_id}' in thread '{thread_id}' not found",
+        )
+
+    return run
+
+
+@app.post(
+    "/threads/{thread_id}/runs/{run_id}", response_model=RunObject, tags=["Assistants"]
+)
+async def modify_run(thread_id: str, run_id: str, body: ModifyRunRequest) -> RunObject:
+    """
+    Modifies a run.
+    """
+    # Query the MongoDB database for the run using 'run_id' and 'thread_id'
+    existing_run = await RunObject.find_one({"id": run_id, "thread_id": thread_id})
+
+    # Check if the run exists
+    if not existing_run:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Run with ID '{run_id}' in thread '{thread_id}' not found",
+        )
+
+    # Update the run object with new metadata from the request
+    if body.metadata is not None:
+        existing_run.metadata = existing_run.metadata or {}
+        existing_run.metadata.update(body.metadata)
+
+    # Save the updated run to MongoDB
+    await existing_run.save()
+
+    return existing_run
+
+
+def convert_to_model(obj):
+    return Model(
+        id=obj.id, created=obj.created, object=obj.object, owned_by=obj.owned_by
+    )
+
+
+def find_provider(obj):
+    if obj["litellm_params"]:
+        if "custom_llm_provider" in obj["litellm_params"]:
+            return obj["litellm_params"]["custom_llm_provider"]
+        else:
+            logging.info(
+                "Cannot find custom_llm_provider field. Trying to extract from model name"
+            )
+            return obj["litellm_params"]["model"].split("/")[0]
+    logging.info(obj)
+    return "rubra"
+
+
+def convert_model_info_to_oai_model(obj, predefined_models):
+    if obj["model_info"]:
+        if "created" in obj["model_info"]:
+            return Model(
+                id=obj["model_name"],
+                created=obj["model_info"]["created"],
+                object="model",
+                owned_by=find_provider(obj),
+            )
+        else:
+            logging.info("Cannot find created field")
+            logging.info(obj)
+            return Model(
+                id=obj["model_name"],
+                created=0,
+                object="model",
+                owned_by=find_provider(obj),
+            )
+    else:
+        # predefined model
+        created_at = -1
+        for m in predefined_models:
+            if m.id == obj["model_name"]:
+                created_at = m.created
+                break
+        return Model(
+            id=obj["model_name"],
+            created=created_at,
+            object="model",
+            owned_by=find_provider(obj),
+        )
+
+
+def litellm_list_model() -> ListModelsResponse:
+    try:
+        client = OpenAI(base_url=LITELLM_URL, api_key="abc")
+        models_data = client.models.list().data
+        models_data = sorted(models_data, key=lambda x: x.id)
+        predefined_models = [convert_to_model(m) for m in models_data]
+
+        models_data = requests.get(f"{LITELLM_URL}/model/info").json().get("data", [])
+        models = [
+            convert_model_info_to_oai_model(m, predefined_models) for m in models_data
+        ]
+        return ListModelsResponse(object=Object.list, data=models)
+
+    except Exception as e:
+        logging.error(str(e))
+        return ListModelsResponse(object=Object.list, data=[])
+
+
+@app.get("/models", response_model=ListModelsResponse, tags=["Models"])
+def list_models() -> ListModelsResponse:
+    return litellm_list_model()
+
+
+@app.get("/models/info", tags=["Models"])
+def get_models_info():
+    model_info_url = f"{LITELLM_URL}/model/info"
+    response = requests.get(model_info_url)
+    if response.status_code == 200:
+        return response.json()
+    return []
+
+
+class ModelInfo(BaseModel):
+    id: str
+    created: int
+
+
+class AddModel(BaseModel):
+    model_name: str
+    litellm_params: Dict[
+        str, Any
+    ]  # Assuming litellm_params is a dictionary with unspecified structure
+    model_info: ModelInfo
+
+
+@app.post("/models/new", tags=["Models"])
+def add_model(add_model: AddModel):
+    data = add_model.dict()
+    print(data)
+    response = requests.post(f"{LITELLM_URL}/model/new", json=data, headers=HEADERS)
+    if response.status_code == 200:
+        return response.json()
+    return []
+
+
+class ModelID(BaseModel):
+    id: str
+
+
+@app.post("/models/delete", tags=["Models"])
+def delete_model(model_id: ModelID):
+    model_info_url = f"{LITELLM_URL}/model/delete"
+    response = requests.post(model_info_url, json=model_id.dict(), headers=HEADERS)
+    if response.status_code == 200:
+        return response.json()
+    return []
+
+
+class EnvironmentVariablesModel(BaseModel):
+    environment_variables: Dict[str, str]
+
+
+@app.post("/config/update", tags=["Models"])
+def update_key(data: EnvironmentVariablesModel):
+    model_info_url = f"{LITELLM_URL}/config/update"
+    data = data.dict()
+    response = requests.post(model_info_url, json=data, headers=HEADERS)
+    if response.status_code == 200:
+        return response.json()
+    return []
+
+
+### APIs for file upload
+@app.get("/files", response_model=ListFilesResponse, tags=["Files"])
+async def list_files(purpose: Optional[str] = None) -> ListFilesResponse:
+    """
+    Returns a list of files that belong to the user's organization.
+    """
+    if purpose == "":
+        purpose = None
+    if purpose is not None and purpose not in Purpose1.__members__:
+        raise HTTPException(
+            status_code=404,
+            detail=f"the purpose of the file has to be one of {Purpose1.__members__}",
+        )
+
+    search_filter = {}
+    if purpose:
+        search_filter["purpose"] = purpose
+
+    res_files = OpenAIFile.find(search_filter)
+    data = []
+    async for f in res_files:
+        data.append(f)
+    return ListFilesResponse(data=data, object=Object7.list)
+
+
+@app.post("/files", response_model=OpenAIFile, tags=["Files"])
+async def create_file(file: UploadFile, purpose: str = Form(...)) -> OpenAIFile:
+    # async def create_file(body: CreateFileRequest) -> OpenAIFile:
+    """
+        Upload a file that can be used across various endpoints/features. The size of all the files uploaded by one organization can be up to 100 GB.
+
+    The size of individual files for can be a maximum of 512MB. See the [Assistants Tools guide](/docs/assistants/tools) to learn more about the types of files supported. The Fine-tuning API only supports `.jsonl` files.
+
+    Please [contact us](https://help.openai.com/) if you need to increase these storage limits.
+
+    """
+
+    # Standard Library
+    from datetime import datetime
+
+    if purpose not in Purpose1.__members__:
+        raise HTTPException(
+            status_code=404,
+            detail=f"the purpose of the file has to be one of {Purpose1.__members__}",
+        )
+
+    # Convert to Unix timestamp
+    create_time_unix = int(datetime.now().timestamp())
+    file_id = f"file_{uuid.uuid4().hex[:6]}"
+
+    # process file # TODO: size limit of a file?
+
+    uploaded_file = OpenAIFile(
+        file_id=file_id,
+        bytes=file.size,
+        created_at=create_time_unix,
+        filename=file.filename,
+        object=Object14.file,
+        purpose=purpose,
+        status=Status.uploaded,
+    )
+    await uploaded_file.insert()
+    content = await file.read()
+
+    file_content_object = FilesStorageObject(
+        file_id=file_id, content=content, content_type=file.content_type
+    )
+    await file_content_object.insert()
+
+    return uploaded_file
+
+
+@app.delete("/files/{file_id}", response_model=DeleteFileResponse, tags=["Files"])
+async def delete_file(file_id: str) -> DeleteFileResponse:
+    """
+    Delete a file.
+    """
+
+    existing_file = await OpenAIFile.find_one({"id": file_id})
+    existing_file_object = await FilesStorageObject.find_one({"id": file_id})
+
+    # If the file does not exist, raise an HTTP 404 error
+    if not existing_file:
+        raise HTTPException(
+            status_code=404, detail=f"File with ID '{file_id}' not found"
+        )
+
+    # Delete the file from MongoDB
+    await existing_file.delete()
+    await existing_file_object.delete()
+
+    # Return response indicating successful deletion
+    return DeleteFileResponse(id=file_id, object=Object8.file, deleted=True)
+
+
+@app.get("/files/{file_id}", response_model=OpenAIFile, tags=["Files"])
+async def retrieve_file(file_id: str) -> OpenAIFile:
+    """
+    Returns information about a specific file.
+    """
+    existing_file = await OpenAIFile.find_one({"id": file_id})
+
+    # If the file does not exist, raise an HTTP 404 error
+    if not existing_file:
+        raise HTTPException(
+            status_code=404, detail=f"File with ID '{file_id}' not found"
+        )
+    return existing_file
+
+
+@app.get("/files/{file_id}/content", response_model=str, tags=["Files"])
+async def download_file(file_id: str) -> str:
+    """
+    Returns the contents of the specified file.
+    """
+    existing_file_object = await FilesStorageObject.find_one({"id": file_id})
+
+    # If the file content does not exist, raise an HTTP 404 error
+    if not existing_file_object:
+        raise HTTPException(
+            status_code=404, detail=f"File content with ID '{file_id}' not found"
+        )
+
+    encodings = ["utf-8", "ascii", "iso-8859-1", "windows-1252", "utf-16", "utf-32"]
+    for encoding in encodings:
+        try:
+            decoded_content = existing_file_object.content.decode(encoding)
+            return decoded_content
+        except Exception as e:
+            logging.error(f"Decoding content with {encoding} failed: {e}\n")
+
+    raise HTTPException(
+        status_code=404, detail=f"Fail to decode content for file: '{file_id}'."
+    )
+
+
+### assistant file
+
+
+@app.get(
+    "/assistants/{assistant_id}/files",
+    response_model=ListAssistantFilesResponse,
+    tags=["Assistants"],
+)
+async def list_assistant_files(
+    assistant_id: str,
+    limit: Optional[int] = 20,
+    order: Optional[Order9] = "asce",
+    after: Optional[str] = None,
+    before: Optional[str] = None,
+) -> ListAssistantFilesResponse:
+    """
+    Returns a list of assistant files.
+    """
+    existing_assistant = await AssistantObject.find_one({"id": assistant_id})
+    if not existing_assistant:
+        raise HTTPException(
+            status_code=404, detail=f"Assistant with ID '{assistant_id}' not found"
+        )
+
+    query = {"assistant_id": assistant_id}
+
+    # Apply 'after' and 'before' filters
+    if after:
+        query["file_id"] = {"$gt": after}
+    if before:
+        query["file_id"] = {"$lt": before}
+
+    # Define sorting order
+    sort_order = DESCENDING if order == "desc" else ASCENDING
+
+    # Prepare the query for assistant files
+    find_query = AssistantFileObject.find(query).sort([("file_id", sort_order)])
+    # Retrieve assistant_files from MongoDB
+    assistant_files = await find_query.to_list(limit)
+
+    # Check if there are more results
+    total_count = await find_query.count()
+    has_more = len(assistant_files) < total_count
+
+    # Prepare the response
+    list_response = ListAssistantFilesResponse(
+        object="list",
+        data=assistant_files,
+        first_id=assistant_files[0].file_id if assistant_files else "",
+        last_id=assistant_files[-1].file_id if assistant_files else "",
+        has_more=has_more,
+    )
+
+    return list_response
+
+
+@app.post(
+    "/assistants/{assistant_id}/files",
+    response_model=AssistantFileObject,
+    tags=["Assistants"],
+)
+async def create_assistant_file(
+    assistant_id: str, body: CreateAssistantFileRequest
+) -> AssistantFileObject:
+    """
+    Create an assistant file by attaching a [File](/docs/api-reference/files) to an [assistant](/docs/api-reference/assistants).
+    """
+    return await _create_assistant_file(assistant_id=assistant_id, file_id=body.file_id)
+
+
+async def _create_assistant_file(
+    assistant_id: str, file_id: str
+) -> AssistantFileObject:
+    # Local
+    from app.tasks import execute_asst_file_create
+
+    # Check if the assistant exists
+    existing_assistant = await AssistantObject.find_one({"id": assistant_id})
+    if not existing_assistant:
+        raise HTTPException(
+            status_code=404, detail=f"Assistant with ID '{assistant_id}' not found"
+        )
+
+    existing_file = await OpenAIFile.find_one({"id": file_id})
+
+    # If the file does not exist, raise an HTTP 404 error
+    if not existing_file:
+        raise HTTPException(
+            status_code=404, detail=f"File with ID '{file_id}' not found"
+        )
+    if existing_file.purpose.value != Purpose1.assistants.value:
+        raise HTTPException(
+            status_code=404,
+            detail=f"File with ID '{file_id}' was not uploaded for purpose 'assistants'.",
+        )
+
+    existing_assistant_file = await AssistantFileObject.find_one(
+        {"id": file_id, "assistant_id": assistant_id}
+    )
+    if not existing_assistant_file:
+        # Create and store the AssistantFileObject
+        assistant_file = AssistantFileObject(
+            file_id=file_id,
+            object=Object28.assistant_file,
+            created_at=int(datetime.now().timestamp()),
+            assistant_id=assistant_id,
+        )
+
+        await assistant_file.insert()
+
+        # celery task
+        execute_asst_file_create.delay(file_id=file_id, assistant_id=assistant_id)
+
+        # Update the assistant's file_ids list and store the updated assistant
+
+        if file_id not in existing_assistant.file_ids:
+            existing_assistant.file_ids.append(file_id)
+        for d in existing_assistant.tools:
+            if d.type.value == Type8.retrieval.value:
+                break
+        else:  # if no `retrieval` tool in tools yet, add it
+            existing_assistant.tools.append(
+                AssistantToolsRetrieval(type=Type8.retrieval)
+            )
+
+        await existing_assistant.save()
+        return assistant_file
+
+    else:
+        raise HTTPException(
+            status_code=404,
+            detail=f"File {file_id} has already been attached to assistant {assistant_id}",
+        )
+
+
+@app.get(
+    "/assistants/{assistant_id}/files/{file_id}",
+    response_model=AssistantFileObject,
+    tags=["Assistants"],
+)
+async def get_assistant_file(
+    assistant_id: str, file_id: str = ...
+) -> AssistantFileObject:
+    """
+    Retrieves an AssistantFile.
+    """
+    existing_assistant_file = await AssistantFileObject.find_one(
+        {"id": file_id, "assistant_id": assistant_id}
+    )
+    if not existing_assistant_file:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Assistant_file with ID '{file_id}' of assistant {assistant_id} not found",
+        )
+
+    return existing_assistant_file
+
+
+@app.delete(
+    "/assistants/{assistant_id}/files/{file_id}",
+    response_model=DeleteAssistantFileResponse,
+    tags=["Assistants"],
+)
+async def delete_assistant_file(
+    assistant_id: str, file_id: str = ...
+) -> DeleteAssistantFileResponse:
+    """
+    Delete an assistant file.
+    """
+    return await _delete_assistant_file(assistant_id=assistant_id, file_id=file_id)
+
+
+async def _delete_assistant_file(
+    assistant_id: str, file_id: str
+) -> DeleteAssistantFileResponse:
+    # Local
+    from app.vector_db.milvus.main import delete_docs
+
+    existing_assistant = await AssistantObject.find_one({"id": assistant_id})
+    if not existing_assistant:
+        raise HTTPException(
+            status_code=404, detail=f"Assistant with ID '{assistant_id}' not found"
+        )
+
+    existing_assistant_file = await AssistantFileObject.find_one(
+        {"id": file_id, "assistant_id": assistant_id}
+    )
+    if not existing_assistant_file:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Assistant_file with ID '{file_id}' of assistant {assistant_id} not found",
+        )
+
+    await existing_assistant_file.delete()
+
+    existing_assistant.file_ids = [
+        x for x in existing_assistant.file_ids if x != file_id
+    ]
+    if len(existing_assistant.file_ids) == 0:
+        existing_assistant.tools = [
+            x for x in existing_assistant.tools if x.type.value != Type8.retrieval.value
+        ]
+    await existing_assistant.save()
+    # await existing_assistant.update({"$set": {"file_ids": cleaned_file_ids}})
+
+    expr = f"file_id == '{file_id}'"
+    delete_docs(collection_name=assistant_id, expr=expr)
+
+    # Return response indicating successful deletion
+    return DeleteAssistantFileResponse(
+        id=file_id,
+        deleted=True,
+        object=Object29.assistant_file_deleted,
+    )
+
+
+@app.get(
+    "/threads/{thread_id}/runs/{run_id}/steps",
+    response_model=ListRunStepsResponse,
+    tags=["Assistants"],
+)
+async def list_run_steps(
+    thread_id: str,
+    run_id: str = ...,
+    limit: Optional[int] = 20,
+    order: Optional[Order7] = "desc",
+    after: Optional[str] = None,
+    before: Optional[str] = None,
+) -> ListRunStepsResponse:
+    """
+    Returns a list of run steps belonging to a run.
+    """
+    existing_run = await RunObject.find_one({"run_id": run_id})
+    if not existing_run:
+        raise HTTPException(status_code=404, detail=f"Run with ID '{run_id}' not found")
+
+    query = {"thread_id": thread_id, "run_id": run_id}
+
+    # Apply 'after' and 'before' filters
+    if after:
+        query["run_step_id"] = {"$gt": after}
+    if before:
+        query["run_step_id"] = {"$lt": before}
+
+    # Define sorting order
+    sort_order = DESCENDING if order == "desc" else ASCENDING
+
+    # Prepare the query
+    find_query = RunStepObject.find(query).sort([("run_step_id", sort_order)])
+    # Retrieve  from MongoDB
+    run_steps = await find_query.to_list(limit)
+
+    # Check if there are more results
+    total_count = await find_query.count()
+    has_more = len(run_steps) < total_count
+
+    # Prepare the response
+    list_response = ListRunStepsResponse(
+        object="list",
+        data=run_steps,
+        first_id=run_steps[0].run_step_id if run_steps else "",
+        last_id=run_steps[-1].run_step_id if run_steps else "",
+        has_more=has_more,
+    )
+
+    return list_response
+
+
+@app.get(
+    "/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
+    response_model=RunStepObject,
+    tags=["Assistants"],
+)
+async def get_run_step(
+    thread_id: str, run_id: str = ..., step_id: str = ...
+) -> RunStepObject:
+    """
+    Retrieves a run step.
+    """
+    existing_run_step = await RunStepObject.find_one(
+        {"thread_id": thread_id, "run_id": run_id, "id": step_id}
+    )
+    if not existing_run_step:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Run_step with ID '{step_id}' of run {run_id} of thread {thread_id} not found",
+        )
+
+    return existing_run_step
+
+
+@app.post(
+    "/v1/chat/completions",
+    tags=["chat/completions"],
+)
+@app.post(
+    "/chat/completions",
+    tags=["chat/completions"],
+)
+async def chat_completion(body: CreateChatCompletionRequest):
+    client = OpenAI(base_url=LITELLM_URL, api_key="abc")
+    chat_messages = [
+        {"role": m.__root__.role.value, "content": m.__root__.content}
+        for m in body.messages
+    ]
+    response_format = body.response_format
+    if body.response_format and body.response_format.type:
+        if body.response_format.type == Type6.json_object:
+            response_format = {"type": "json_object"}
+        elif body.response_format.type == Type6.text:
+            response_format = {"type": "text"}
+
+    if type(body.max_tokens) != int:
+        max_tokens = 128000
+    else:
+        max_tokens = body.max_tokens
+
+    print(body)
+    response = client.chat.completions.create(
+        model=body.model,
+        messages=chat_messages,
+        temperature=body.temperature,
+        top_p=body.top_p,
+        stream=body.stream,
+        response_format=response_format,
+        frequency_penalty=body.frequency_penalty,
+        logit_bias=body.logit_bias,
+        max_tokens=max_tokens,
+        n=body.n,
+        presence_penalty=body.presence_penalty,
+        seed=body.seed,
+        stop=body.stop,
+        tool_choice=body.tool_choice,
+        tools=body.tools,
+        user=body.user,
+        function_call=body.function_call,
+        functions=body.functions,
+    )
+
+    if body.stream:
+        return StreamingResponse(
+            data_generator(response), media_type="text/event-stream"
+        )
+    else:
+        return response
+
+
+def data_generator(response):
+    """
+    Format data in Server-Sent Event (SSE) messages, which OpenAI Stream API consumes.
+    https://github.com/florimondmanca/httpx-sse/blob/master/src/httpx_sse/_decoders.py
+    """
+    try:
+        for chunk in response:
+            try:
+                yield f"data: {json.dumps(chunk.dict())}\n\n"
+            except Exception as e:
+                yield f"data: {str(e)}\n\n"
+
+        # Streaming is done, yield the [DONE] chunk
+        done_message = "[DONE]"
+        yield f"data: {done_message}\n\n"
+    except Exception as e:
+        yield f"data: {str(e)}\n\n"
diff --git a/services/backend/api_server/app/backend_models.py b/services/backend/api_server/app/backend_models.py
new file mode 100644
index 0000000..ab92e0c
--- /dev/null
+++ b/services/backend/api_server/app/backend_models.py
@@ -0,0 +1,410 @@
+# backend_models.py
+from beanie import Document
+from pydantic import (
+    AnyUrl,
+    BaseModel,
+    Extra,
+    Field,
+    PositiveFloat,
+    confloat,
+    conint,
+    constr,
+)
+from typing import Any, Dict, List, Optional, Union
+from .models import (
+    AssistantFileObject,
+    AssistantToolsFunction,
+    CreateAssistantRequest,
+    FineTuningJob,
+    ImagesResponse,
+    LastError,
+    ListAssistantFilesResponse,
+    ListFilesResponse,
+    ListFineTuneEventsResponse,
+    Object7,
+    Object14,
+    Purpose1,
+    Status,
+    Object20,
+    Object21,
+    Object22,
+    Object23,
+    Object24,
+    Object25,
+    Object27,
+    Object28,
+    RequiredAction,
+    Role7,
+    Role8,
+    Type16,
+    Status2,
+    Status3,
+    LastError1,
+    MessageContentTextObject,
+    MessageContentImageFileObject,
+    RunStepDetailsMessageCreationObject, 
+    RunStepDetailsToolCallsObject,
+    AssistantToolsCode,
+    AssistantToolsRetrieval,
+    AssistantToolsFunction,
+    AssistantToolsBrowser
+)
+
+class AssistantObject(Document):
+    assistant_id: str = Field(
+        ..., description="The identifier, which can be referenced in API endpoints.", alias="id"
+    )
+    object: Object20 = Field(
+        ..., description="The object type, which is always `assistant`."
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the assistant was created.",
+    )
+    name: constr(max_length=256) = Field(
+        ...,
+        description="The name of the assistant. The maximum length is 256 characters.\n",
+    )
+    description: constr(max_length=512) = Field(
+        ...,
+        description="The description of the assistant. The maximum length is 512 characters.\n",
+    )
+    model: str = Field(
+        ...,
+        description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
+    )
+    instructions: constr(max_length=32768) = Field(
+        ...,
+        description="The system instructions that the assistant uses. The maximum length is 32768 characters.\n",
+    )
+    tools: List[
+        Union[AssistantToolsCode, AssistantToolsRetrieval, AssistantToolsFunction, AssistantToolsBrowser]
+    ] = Field(
+        ...,
+        description="A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.\n",
+        max_items=128,
+    )
+    file_ids: List[str] = Field(
+        ...,
+        description="A list of [file](/docs/api-reference/files) IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order.\n",
+        max_items=20,
+    )
+    metadata: Dict[str, Any] = Field(
+        ...,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+    class Settings:
+        name = "assistants"
+
+class ListAssistantsResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[AssistantObject]
+    first_id: str = Field(..., example="asst_hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="asst_QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+
+class ThreadObject(Document):
+    thread_id: str = Field(
+        ..., description="The identifier, which can be referenced in API endpoints.", alias="id"
+    )
+    object: Object23 = Field(
+        ..., description="The object type, which is always `thread`."
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the thread was created.",
+    )
+    metadata: Dict[str, Any] = Field(
+        ...,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long."
+    )
+
+    class Settings:
+        name = "threads"
+
+class MessageObject(Document):
+    message_id: str = Field(
+        ..., description="The identifier, which can be referenced in API endpoints.", alias="id"
+    )
+    object: Object25 = Field(
+        ..., description="The object type, which is always `thread.message`."
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the message was created.",
+    )
+    thread_id: str = Field(
+        ...,
+        description="The [thread](/docs/api-reference/threads) ID that this message belongs to.",
+    )
+    role: Role7 = Field(
+        ...,
+        description="The entity that produced the message. One of `user` or `assistant`.",
+    )
+    content: List[
+        Union[MessageContentImageFileObject, MessageContentTextObject]
+    ] = Field(
+        ..., description="The content of the message in array of text and/or images."
+    )
+    assistant_id: str = Field(
+        ...,
+        description="If applicable, the ID of the [assistant](/docs/api-reference/assistants) that authored this message.",
+    )
+    run_id: str = Field(
+        ...,
+        description="If applicable, the ID of the [run](/docs/api-reference/runs) associated with the authoring of this message.",
+    )
+    file_ids: List[str] = Field(
+        ...,
+        description="A list of [file](/docs/api-reference/files) IDs that the assistant should use. Useful for tools like retrieval and code_interpreter that can access files. A maximum of 10 files can be attached to a message.",
+        max_items=10,
+    )
+    metadata: Dict[str, Any] = Field(
+        ...,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+    class Settings:
+        name = "messages"
+
+class ListMessagesResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[MessageObject]
+    first_id: str = Field(..., example="msg_hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="msg_QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+
+class RunObject(Document):
+    run_id: str = Field(
+        ..., description="The identifier, which can be referenced in API endpoints.", alias="id"
+    )
+    object: Object22 = Field(
+        ..., description="The object type, which is always `thread.run`."
+    )
+    created_at: int = Field(
+        ..., description="The Unix timestamp (in seconds) for when the run was created."
+    )
+    thread_id: str = Field(
+        ...,
+        description="The ID of the [thread](/docs/api-reference/threads) that was executed on as a part of this run.",
+    )
+    assistant_id: str = Field(
+        ...,
+        description="The ID of the [assistant](/docs/api-reference/assistants) used for execution of this run.",
+    )
+    status: Status2 = Field(
+        ...,
+        description="The status of the run, which can be either `queued`, `in_progress`, `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, or `expired`.",
+    )
+    required_action: RequiredAction = Field(
+        None,
+        description="Details on the action required to continue the run. Will be `null` if no action is required.",
+    )
+    last_error: LastError = Field(
+        None,
+        description="The last error associated with this run. Will be `null` if there are no errors.",
+    )
+    expires_at: int = Field(
+        None, description="The Unix timestamp (in seconds) for when the run will expire."
+    )
+    started_at: int = Field(
+        None, description="The Unix timestamp (in seconds) for when the run was started."
+    )
+    cancelled_at: int = Field(
+        None,
+        description="The Unix timestamp (in seconds) for when the run was cancelled.",
+    )
+    failed_at: int = Field(
+        None, description="The Unix timestamp (in seconds) for when the run failed."
+    )
+    completed_at: int = Field(
+        None,
+        description="The Unix timestamp (in seconds) for when the run was completed.",
+    )
+    model: str = Field(
+        ...,
+        description="The model that the [assistant](/docs/api-reference/assistants) used for this run.",
+    )
+    instructions: str = Field(
+        ...,
+        description="The instructions that the [assistant](/docs/api-reference/assistants) used for this run.",
+    )
+    tools: List[
+        Union[AssistantToolsCode, AssistantToolsRetrieval, AssistantToolsFunction, AssistantToolsBrowser]
+    ] = Field(
+        ...,
+        description="The list of tools that the [assistant](/docs/api-reference/assistants) used for this run.",
+        max_items=20,
+    )
+    file_ids: List[str] = Field(
+        ...,
+        description="The list of [File](/docs/api-reference/files) IDs the [assistant](/docs/api-reference/assistants) used for this run.",
+    )
+    metadata: Dict[str, Any] = Field(
+        ...,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+    class Settings:
+        name = "runs"
+
+
+
+class OpenAIFile(Document):
+    file_id: str = Field(
+        ...,
+        description="The file identifier, which can be referenced in the API endpoints.", alias="id"
+    )
+    bytes: int = Field(..., description="The size of the file, in bytes.")
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the file was created.",
+    )
+    filename: str = Field(..., description="The name of the file.")
+    object: Object14 = Field(
+        ..., description="The object type, which is always `file`."
+    )
+    purpose: Purpose1 = Field(
+        ...,
+        description="The intended purpose of the file. Supported values are `fine-tune`, `fine-tune-results`, `assistants`, and `assistants_output`.",
+    )
+    status: Status = Field(
+        ...,
+        description="Deprecated. The current status of the file, which can be either `uploaded`, `processed`, or `error`.",
+    )
+    status_details: Optional[str] = Field(
+        None,
+        description="Deprecated. For details on why a fine-tuning training file failed validation, see the `error` field on `fine_tuning.job`.",
+    )
+    
+    class Settings:
+        name = "files"
+
+class ListFilesResponse(BaseModel):
+    data: List[OpenAIFile]
+    object: Object7
+
+class FilesStorageObject(Document):
+    file_id: str = Field(
+        ...,
+        description="The file identifier, which can be referenced in the API endpoints.", alias="id"
+    )
+    content: bytes = Field(..., description="The file content")
+    content_type: str = Field(..., description="The file content type")
+    
+    class Settings:
+        name = "files_storage"
+
+
+class AssistantFileObject(Document):
+    file_id: str = Field(
+        ..., description="The identifier, which can be referenced in API endpoints." , alias="id"
+    )
+    object: Object28 = Field(
+        ..., description="The object type, which is always `assistant.file`."
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the assistant file was created.",
+    )
+    assistant_id: str = Field(
+        ..., description="The assistant ID that the file is attached to."
+    )
+    
+    class Settings:
+        name = "assistant_files"
+
+
+class ListAssistantFilesResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[AssistantFileObject]
+    first_id: str = Field(..., example="file-hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="file-QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+    
+
+class RunStepObject(Document):
+    run_step_id: str = Field(
+        ...,
+        description="The identifier of the run step, which can be referenced in API endpoints.", alias="id"
+    )
+    object: Object27 = Field(
+        ..., description="The object type, which is always `thread.run.step``."
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the run step was created.",
+    )
+    assistant_id: str = Field(
+        ...,
+        description="The ID of the [assistant](/docs/api-reference/assistants) associated with the run step.",
+    )
+    thread_id: str = Field(
+        ...,
+        description="The ID of the [thread](/docs/api-reference/threads) that was run.",
+    )
+    run_id: str = Field(
+        ...,
+        description="The ID of the [run](/docs/api-reference/runs) that this run step is a part of.",
+    )
+    type: Type16 = Field(
+        ...,
+        description="The type of run step, which can be either `message_creation` or `tool_calls`.",
+    )
+    status: Status3 = Field(
+        ...,
+        description="The status of the run step, which can be either `in_progress`, `cancelled`, `failed`, `completed`, or `expired`.",
+    )
+    step_details: Union[
+        RunStepDetailsMessageCreationObject, RunStepDetailsToolCallsObject
+    ] = Field(..., description="The details of the run step.")
+    last_error: LastError1 = Field(
+        ...,
+        description="The last error associated with this run step. Will be `null` if there are no errors.",
+    )
+    expired_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the run step expired. A step is considered expired if the parent run is expired.",
+    )
+    cancelled_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the run step was cancelled.",
+    )
+    failed_at: int = Field(
+        ..., description="The Unix timestamp (in seconds) for when the run step failed."
+    )
+    completed_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the run step completed.",
+    )
+    metadata: Dict[str, Any] = Field(
+        ...,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+    class Settings:
+        name = "run_steps"
+
+
+class ListRunStepsResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[RunStepObject]
+    first_id: str = Field(..., example="step_hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="step_QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+    
+    
+class ListRunsResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[RunObject]
+    first_id: str = Field(..., example="run_hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="run_QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+    
+
+class FileUpload(BaseModel):
+    purpose: str
+
+class ApiKeysUpdateModel(BaseModel):
+    OPENAI_API_KEY: Optional[bool] = None
+    ANTHROPIC_API_KEY: Optional[bool] = None
diff --git a/services/backend/api_server/app/helpers.py b/services/backend/api_server/app/helpers.py
new file mode 100644
index 0000000..3af34c5
--- /dev/null
+++ b/services/backend/api_server/app/helpers.py
@@ -0,0 +1,28 @@
+# Standard Library
+import uuid
+
+
+def generate_assistant_id():
+    # Generate a unique ID and convert it to a string
+    unique_id = uuid.uuid4()
+    # Attach the prefix 'asst_' to the ID
+    assistant_id = f"asst_{unique_id.hex[:6]}"
+    return assistant_id
+
+
+def generate_thread_id():
+    # Generate a unique ID and convert it to a string
+    thread_id = f"thread_{uuid.uuid4().hex[:6]}"
+    return thread_id
+
+
+def generate_message_id():
+    # Generate a unique ID and convert it to a string
+    msg_id = f"msg_{uuid.uuid4().hex[:6]}"
+    return msg_id
+
+
+def generate_run_id():
+    # Generate a unique ID and convert it to a string
+    run_id = f"run_{uuid.uuid4().hex[:6]}"
+    return run_id
diff --git a/services/backend/api_server/app/models.py b/services/backend/api_server/app/models.py
new file mode 100644
index 0000000..e7954dc
--- /dev/null
+++ b/services/backend/api_server/app/models.py
@@ -0,0 +1,2870 @@
+from __future__ import annotations
+
+from enum import Enum
+from typing import Any, Dict, List, Optional, Union
+
+from pydantic import (
+    AnyUrl,
+    BaseModel,
+    Extra,
+    Field,
+    PositiveFloat,
+    confloat,
+    conint,
+    constr,
+)
+
+
+class Error(BaseModel):
+    code: str
+    message: str
+    param: str
+    type: str
+
+
+class ErrorResponse(BaseModel):
+    error: Error
+
+
+class Object(Enum):
+    list = "list"
+
+
+class DeleteModelResponse(BaseModel):
+    id: str
+    deleted: bool
+    object: str
+
+
+class ModelEnum(Enum):
+    babbage_002 = "babbage-002"
+    davinci_002 = "davinci-002"
+    gpt_3_5_turbo_instruct = "gpt-3.5-turbo-instruct"
+    text_davinci_003 = "text-davinci-003"
+    text_davinci_002 = "text-davinci-002"
+    text_davinci_001 = "text-davinci-001"
+    code_davinci_002 = "code-davinci-002"
+    text_curie_001 = "text-curie-001"
+    text_babbage_001 = "text-babbage-001"
+    text_ada_001 = "text-ada-001"
+
+
+class PromptItem(BaseModel):
+    __root__: List[Any]
+
+
+class CreateCompletionRequest(BaseModel):
+    model: Union[str, ModelEnum] = Field(
+        ...,
+        description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
+    )
+    prompt: Union[str, List[str], List[int], List[PromptItem]] = Field(
+        ...,
+        description="The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.\n\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document.\n",
+    )
+    best_of: Optional[conint(ge=0, le=20)] = Field(
+        1,
+        description='Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed.\n\nWhen used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`.\n\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.\n',
+    )
+    echo: Optional[bool] = Field(
+        False, description="Echo back the prompt in addition to the completion\n"
+    )
+    frequency_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
+        0,
+        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)\n",
+    )
+    logit_bias: Optional[Dict[str, int]] = Field(
+        None,
+        description='Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.\n\nAs an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated.\n',
+    )
+    logprobs: Optional[conint(ge=0, le=5)] = Field(
+        None,
+        description="Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response.\n\nThe maximum value for `logprobs` is 5.\n",
+    )
+    max_tokens: Optional[conint(ge=0)] = Field(
+        16,
+        description="The maximum number of [tokens](/tokenizer) to generate in the completion.\n\nThe token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n",
+        example=16,
+    )
+    n: Optional[conint(ge=1, le=128)] = Field(
+        1,
+        description="How many completions to generate for each prompt.\n\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.\n",
+        example=1,
+    )
+    presence_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
+        0,
+        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)\n",
+    )
+    seed: Optional[conint(ge=-9223372036854775808, le=9223372036854775808)] = Field(
+        None,
+        description="If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.\n\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.\n",
+    )
+    stop: Optional[Union[str, List[str]]] = Field(
+        None,
+        description="Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.\n",
+    )
+    stream: Optional[bool] = Field(
+        False,
+        description="Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).\n",
+    )
+    suffix: Optional[str] = Field(
+        None,
+        description="The suffix that comes after a completion of inserted text.",
+        example="test.",
+    )
+    temperature: Optional[confloat(ge=0.0, le=2.0)] = Field(
+        1,
+        description="What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n",
+        example=1,
+    )
+    top_p: Optional[confloat(ge=0.0, le=1.0)] = Field(
+        1,
+        description="An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n",
+        example=1,
+    )
+    user: Optional[str] = Field(
+        None,
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
+        example="user-1234",
+    )
+
+
+class FinishReason(Enum):
+    stop = "stop"
+    length = "length"
+    content_filter = "content_filter"
+
+
+class Logprobs(BaseModel):
+    text_offset: Optional[List[int]] = None
+    token_logprobs: Optional[List[float]] = None
+    tokens: Optional[List[str]] = None
+    top_logprobs: Optional[List[Dict[str, float]]] = None
+
+
+class Choice(BaseModel):
+    finish_reason: FinishReason = Field(
+        ...,
+        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\nor `content_filter` if content was omitted due to a flag from our content filters.\n",
+    )
+    index: int
+    logprobs: Logprobs
+    text: str
+
+
+class Object1(Enum):
+    text_completion = "text_completion"
+
+
+class Type(Enum):
+    image_url = "image_url"
+
+
+class Detail(Enum):
+    auto = "auto"
+    low = "low"
+    high = "high"
+
+
+class ImageUrl(BaseModel):
+    url: AnyUrl = Field(
+        ..., description="Either a URL of the image or the base64 encoded image data."
+    )
+    detail: Optional[Detail] = Field(
+        "auto", description="Specifies the detail level of the image."
+    )
+
+
+class ChatCompletionRequestMessageContentPartImage(BaseModel):
+    type: Type = Field(..., description="The type of the content part.")
+    image_url: ImageUrl
+
+
+class Type1(Enum):
+    text = "text"
+
+
+class ChatCompletionRequestMessageContentPartText(BaseModel):
+    type: Type1 = Field(..., description="The type of the content part.")
+    text: str = Field(..., description="The text content.")
+
+
+class Role(Enum):
+    system = "system"
+
+
+class ChatCompletionRequestSystemMessage(BaseModel):
+    content: str = Field(..., description="The contents of the system message.")
+    role: Role = Field(
+        ..., description="The role of the messages author, in this case `system`."
+    )
+
+
+class Role1(Enum):
+    user = "user"
+
+
+class Role2(Enum):
+    assistant = "assistant"
+
+
+class FunctionCall(BaseModel):
+    arguments: str = Field(
+        ...,
+        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
+    )
+    name: str = Field(..., description="The name of the function to call.")
+
+
+class Role3(Enum):
+    tool = "tool"
+
+
+class ChatCompletionRequestToolMessage(BaseModel):
+    role: Role3 = Field(
+        ..., description="The role of the messages author, in this case `tool`."
+    )
+    content: str = Field(..., description="The contents of the tool message.")
+    tool_call_id: str = Field(
+        ..., description="Tool call that this message is responding to."
+    )
+
+
+class Role4(Enum):
+    function = "function"
+
+
+class ChatCompletionRequestFunctionMessage(BaseModel):
+    role: Role4 = Field(
+        ..., description="The role of the messages author, in this case `function`."
+    )
+    content: str = Field(
+        ...,
+        description="The return value from the function call, to return to the model.",
+    )
+    name: str = Field(..., description="The name of the function to call.")
+
+
+class FunctionParameters(BaseModel):
+    pass
+
+    class Config:
+        extra = Extra.allow
+
+
+class ChatCompletionFunctions(BaseModel):
+    description: Optional[str] = Field(
+        None,
+        description="A description of what the function does, used by the model to choose when and how to call the function.",
+    )
+    name: str = Field(
+        ...,
+        description="The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.",
+    )
+    parameters: FunctionParameters
+
+
+class ChatCompletionFunctionCallOption(BaseModel):
+    name: str = Field(..., description="The name of the function to call.")
+
+
+class Type2(Enum):
+    function = "function"
+
+
+class FunctionObject(BaseModel):
+    description: Optional[str] = Field(
+        None,
+        description="A description of what the function does, used by the model to choose when and how to call the function.",
+    )
+    name: str = Field(
+        ...,
+        description="The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.",
+    )
+    parameters: FunctionParameters
+
+
+class ChatCompletionToolChoiceOptionEnum(Enum):
+    none = "none"
+    auto = "auto"
+
+
+class Type3(Enum):
+    function = "function"
+
+
+class Function(BaseModel):
+    name: str = Field(..., description="The name of the function to call.")
+
+
+class ChatCompletionNamedToolChoice(BaseModel):
+    type: Optional[Type3] = Field(
+        None,
+        description="The type of the tool. Currently, only `function` is supported.",
+    )
+    function: Optional[Function] = None
+
+
+class Type4(Enum):
+    function = "function"
+
+
+class Function1(BaseModel):
+    name: str = Field(..., description="The name of the function to call.")
+    arguments: str = Field(
+        ...,
+        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
+    )
+
+
+class ChatCompletionMessageToolCall(BaseModel):
+    id: str = Field(..., description="The ID of the tool call.")
+    type: Type4 = Field(
+        ...,
+        description="The type of the tool. Currently, only `function` is supported.",
+    )
+    function: Function1 = Field(..., description="The function that the model called.")
+
+
+class Type5(Enum):
+    function = "function"
+
+
+class Function2(BaseModel):
+    name: Optional[str] = Field(None, description="The name of the function to call.")
+    arguments: Optional[str] = Field(
+        None,
+        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
+    )
+
+
+class ChatCompletionMessageToolCallChunk(BaseModel):
+    index: int
+    id: Optional[str] = Field(None, description="The ID of the tool call.")
+    type: Optional[Type5] = Field(
+        None,
+        description="The type of the tool. Currently, only `function` is supported.",
+    )
+    function: Optional[Function2] = None
+
+
+class ChatCompletionRole(Enum):
+    system = "system"
+    user = "user"
+    assistant = "assistant"
+    tool = "tool"
+    function = "function"
+
+
+class Role5(Enum):
+    assistant = "assistant"
+
+
+class FunctionCall1(BaseModel):
+    arguments: str = Field(
+        ...,
+        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
+    )
+    name: str = Field(..., description="The name of the function to call.")
+
+
+class FunctionCall2(BaseModel):
+    arguments: Optional[str] = Field(
+        None,
+        description="The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.",
+    )
+    name: Optional[str] = Field(None, description="The name of the function to call.")
+
+
+class Role6(Enum):
+    system = "system"
+    user = "user"
+    assistant = "assistant"
+    tool = "tool"
+
+
+class ChatCompletionStreamResponseDelta(BaseModel):
+    content: Optional[str] = Field(
+        None, description="The contents of the chunk message."
+    )
+    function_call: Optional[FunctionCall2] = Field(
+        None,
+        description="Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.",
+    )
+    tool_calls: Optional[List[ChatCompletionMessageToolCallChunk]] = None
+    role: Optional[Role6] = Field(
+        None, description="The role of the author of this message."
+    )
+
+
+class ModelEnum1(Enum):
+    gpt_4_1106_preview = "gpt-4-1106-preview"
+    gpt_4_vision_preview = "gpt-4-vision-preview"
+    gpt_4 = "gpt-4"
+    gpt_4_0314 = "gpt-4-0314"
+    gpt_4_0613 = "gpt-4-0613"
+    gpt_4_32k = "gpt-4-32k"
+    gpt_4_32k_0314 = "gpt-4-32k-0314"
+    gpt_4_32k_0613 = "gpt-4-32k-0613"
+    gpt_3_5_turbo_1106 = "gpt-3.5-turbo-1106"
+    gpt_3_5_turbo = "gpt-3.5-turbo"
+    gpt_3_5_turbo_16k = "gpt-3.5-turbo-16k"
+    gpt_3_5_turbo_0301 = "gpt-3.5-turbo-0301"
+    gpt_3_5_turbo_0613 = "gpt-3.5-turbo-0613"
+    gpt_3_5_turbo_16k_0613 = "gpt-3.5-turbo-16k-0613"
+
+
+class Type6(Enum):
+    text = "text"
+    json_object = "json_object"
+
+
+class ResponseFormat(BaseModel):
+    type: Optional[Type6] = Field(
+        "text",
+        description="Must be one of `text` or `json_object`.",
+        example="json_object",
+    )
+
+
+class FunctionCallEnum(Enum):
+    none = "none"
+    auto = "auto"
+
+
+class FinishReason1(Enum):
+    stop = "stop"
+    length = "length"
+    tool_calls = "tool_calls"
+    content_filter = "content_filter"
+    function_call = "function_call"
+
+
+class Object2(Enum):
+    chat_completion = "chat.completion"
+
+
+class FinishReason2(Enum):
+    stop = "stop"
+    length = "length"
+    function_call = "function_call"
+    content_filter = "content_filter"
+
+
+class Object3(Enum):
+    chat_completion = "chat.completion"
+
+
+class Object4(Enum):
+    list = "list"
+
+
+class FinishReason3(Enum):
+    stop = "stop"
+    length = "length"
+    tool_calls = "tool_calls"
+    content_filter = "content_filter"
+    function_call = "function_call"
+
+
+class Choice3(BaseModel):
+    delta: ChatCompletionStreamResponseDelta
+    finish_reason: FinishReason3 = Field(
+        ...,
+        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\n`content_filter` if content was omitted due to a flag from our content filters,\n`tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.\n",
+    )
+    index: int = Field(
+        ..., description="The index of the choice in the list of choices."
+    )
+
+
+class Object5(Enum):
+    chat_completion_chunk = "chat.completion.chunk"
+
+
+class CreateChatCompletionStreamResponse(BaseModel):
+    id: str = Field(
+        ...,
+        description="A unique identifier for the chat completion. Each chunk has the same ID.",
+    )
+    choices: List[Choice3] = Field(
+        ...,
+        description="A list of chat completion choices. Can be more than one if `n` is greater than 1.",
+    )
+    created: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp.",
+    )
+    model: str = Field(..., description="The model to generate the completion.")
+    system_fingerprint: Optional[str] = Field(
+        None,
+        description="This fingerprint represents the backend configuration that the model runs with.\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
+    )
+    object: Object5 = Field(
+        ..., description="The object type, which is always `chat.completion.chunk`."
+    )
+
+
+class CreateChatCompletionImageResponse(BaseModel):
+    pass
+
+
+class ModelEnum2(Enum):
+    text_davinci_edit_001 = "text-davinci-edit-001"
+    code_davinci_edit_001 = "code-davinci-edit-001"
+
+
+class CreateEditRequest(BaseModel):
+    instruction: str = Field(
+        ...,
+        description="The instruction that tells the model how to edit the prompt.",
+        example="Fix the spelling mistakes.",
+    )
+    model: Union[str, ModelEnum2] = Field(
+        ...,
+        description="ID of the model to use. You can use the `text-davinci-edit-001` or `code-davinci-edit-001` model with this endpoint.",
+        example="text-davinci-edit-001",
+    )
+    input: Optional[str] = Field(
+        "",
+        description="The input text to use as a starting point for the edit.",
+        example="What day of the wek is it?",
+    )
+    n: Optional[conint(ge=1, le=20)] = Field(
+        1,
+        description="How many edits to generate for the input and instruction.",
+        example=1,
+    )
+    temperature: Optional[confloat(ge=0.0, le=2.0)] = Field(
+        1,
+        description="What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n",
+        example=1,
+    )
+    top_p: Optional[confloat(ge=0.0, le=1.0)] = Field(
+        1,
+        description="An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n",
+        example=1,
+    )
+
+
+class FinishReason4(Enum):
+    stop = "stop"
+    length = "length"
+
+
+class Choice4(BaseModel):
+    finish_reason: FinishReason4 = Field(
+        ...,
+        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\nor `content_filter` if content was omitted due to a flag from our content filters.\n",
+    )
+    index: int = Field(
+        ..., description="The index of the choice in the list of choices."
+    )
+    text: str = Field(..., description="The edited result.")
+
+
+class Object6(Enum):
+    edit = "edit"
+
+
+class ModelEnum3(Enum):
+    dall_e_2 = "dall-e-2"
+    dall_e_3 = "dall-e-3"
+
+
+class Quality(Enum):
+    standard = "standard"
+    hd = "hd"
+
+
+class ResponseFormat1(Enum):
+    url = "url"
+    b64_json = "b64_json"
+
+
+class Size(Enum):
+    field_256x256 = "256x256"
+    field_512x512 = "512x512"
+    field_1024x1024 = "1024x1024"
+    field_1792x1024 = "1792x1024"
+    field_1024x1792 = "1024x1792"
+
+
+class Style(Enum):
+    vivid = "vivid"
+    natural = "natural"
+
+
+class CreateImageRequest(BaseModel):
+    prompt: str = Field(
+        ...,
+        description="A text description of the desired image(s). The maximum length is 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`.",
+        example="A cute baby sea otter",
+    )
+    model: Optional[Union[str, ModelEnum3]] = Field(
+        "dall-e-2",
+        description="The model to use for image generation.",
+        example="dall-e-3",
+    )
+    n: Optional[conint(ge=1, le=10)] = Field(
+        1,
+        description="The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.",
+        example=1,
+    )
+    quality: Optional[Quality] = Field(
+        "standard",
+        description="The quality of the image that will be generated. `hd` creates images with finer details and greater consistency across the image. This param is only supported for `dall-e-3`.",
+        example="standard",
+    )
+    response_format: Optional[ResponseFormat1] = Field(
+        "url",
+        description="The format in which the generated images are returned. Must be one of `url` or `b64_json`.",
+        example="url",
+    )
+    size: Optional[Size] = Field(
+        "1024x1024",
+        description="The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.",
+        example="1024x1024",
+    )
+    style: Optional[Style] = Field(
+        "vivid",
+        description="The style of the generated images. Must be one of `vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for `dall-e-3`.",
+        example="vivid",
+    )
+    user: Optional[str] = Field(
+        None,
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
+        example="user-1234",
+    )
+
+
+class Image(BaseModel):
+    b64_json: Optional[str] = Field(
+        None,
+        description="The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.",
+    )
+    url: Optional[str] = Field(
+        None,
+        description="The URL of the generated image, if `response_format` is `url` (default).",
+    )
+    revised_prompt: Optional[str] = Field(
+        None,
+        description="The prompt that was used to generate the image, if there was any revision to the prompt.",
+    )
+
+
+class ModelEnum4(Enum):
+    dall_e_2 = "dall-e-2"
+
+
+class Size1(Enum):
+    field_256x256 = "256x256"
+    field_512x512 = "512x512"
+    field_1024x1024 = "1024x1024"
+
+
+class ResponseFormat2(Enum):
+    url = "url"
+    b64_json = "b64_json"
+
+
+class CreateImageEditRequest(BaseModel):
+    image: bytes = Field(
+        ...,
+        description="The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask.",
+    )
+    prompt: str = Field(
+        ...,
+        description="A text description of the desired image(s). The maximum length is 1000 characters.",
+        example="A cute baby sea otter wearing a beret",
+    )
+    mask: Optional[bytes] = Field(
+        None,
+        description="An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`.",
+    )
+    model: Optional[Union[str, ModelEnum4]] = Field(
+        "dall-e-2",
+        description="The model to use for image generation. Only `dall-e-2` is supported at this time.",
+        example="dall-e-2",
+    )
+    n: Optional[conint(ge=1, le=10)] = Field(
+        1,
+        description="The number of images to generate. Must be between 1 and 10.",
+        example=1,
+    )
+    size: Optional[Size1] = Field(
+        "1024x1024",
+        description="The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.",
+        example="1024x1024",
+    )
+    response_format: Optional[ResponseFormat2] = Field(
+        "url",
+        description="The format in which the generated images are returned. Must be one of `url` or `b64_json`.",
+        example="url",
+    )
+    user: Optional[str] = Field(
+        None,
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
+        example="user-1234",
+    )
+
+
+class ModelEnum5(Enum):
+    dall_e_2 = "dall-e-2"
+
+
+class ResponseFormat3(Enum):
+    url = "url"
+    b64_json = "b64_json"
+
+
+class Size2(Enum):
+    field_256x256 = "256x256"
+    field_512x512 = "512x512"
+    field_1024x1024 = "1024x1024"
+
+
+class CreateImageVariationRequest(BaseModel):
+    image: bytes = Field(
+        ...,
+        description="The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and square.",
+    )
+    model: Optional[Union[str, ModelEnum5]] = Field(
+        "dall-e-2",
+        description="The model to use for image generation. Only `dall-e-2` is supported at this time.",
+        example="dall-e-2",
+    )
+    n: Optional[conint(ge=1, le=10)] = Field(
+        1,
+        description="The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.",
+        example=1,
+    )
+    response_format: Optional[ResponseFormat3] = Field(
+        "url",
+        description="The format in which the generated images are returned. Must be one of `url` or `b64_json`.",
+        example="url",
+    )
+    size: Optional[Size2] = Field(
+        "1024x1024",
+        description="The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.",
+        example="1024x1024",
+    )
+    user: Optional[str] = Field(
+        None,
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
+        example="user-1234",
+    )
+
+
+class ModelEnum6(Enum):
+    text_moderation_latest = "text-moderation-latest"
+    text_moderation_stable = "text-moderation-stable"
+
+
+class CreateModerationRequest(BaseModel):
+    input: Union[str, List[str]] = Field(..., description="The input text to classify")
+    model: Optional[Union[str, ModelEnum6]] = Field(
+        "text-moderation-latest",
+        description="Two content moderations models are available: `text-moderation-stable` and `text-moderation-latest`.\n\nThe default is `text-moderation-latest` which will be automatically upgraded over time. This ensures you are always using our most accurate model. If you use `text-moderation-stable`, we will provide advanced notice before updating the model. Accuracy of `text-moderation-stable` may be slightly lower than for `text-moderation-latest`.\n",
+        example="text-moderation-stable",
+    )
+
+
+class Categories(BaseModel):
+    hate: bool = Field(
+        ...,
+        description="Content that expresses, incites, or promotes hate based on race, gender, ethnicity, religion, nationality, sexual orientation, disability status, or caste. Hateful content aimed at non-protected groups (e.g., chess players) is harrassment.",
+    )
+    hate_threatening: bool = Field(
+        ...,
+        alias="hate/threatening",
+        description="Hateful content that also includes violence or serious harm towards the targeted group based on race, gender, ethnicity, religion, nationality, sexual orientation, disability status, or caste.",
+    )
+    harassment: bool = Field(
+        ...,
+        description="Content that expresses, incites, or promotes harassing language towards any target.",
+    )
+    harassment_threatening: bool = Field(
+        ...,
+        alias="harassment/threatening",
+        description="Harassment content that also includes violence or serious harm towards any target.",
+    )
+    self_harm: bool = Field(
+        ...,
+        alias="self-harm",
+        description="Content that promotes, encourages, or depicts acts of self-harm, such as suicide, cutting, and eating disorders.",
+    )
+    self_harm_intent: bool = Field(
+        ...,
+        alias="self-harm/intent",
+        description="Content where the speaker expresses that they are engaging or intend to engage in acts of self-harm, such as suicide, cutting, and eating disorders.",
+    )
+    self_harm_instructions: bool = Field(
+        ...,
+        alias="self-harm/instructions",
+        description="Content that encourages performing acts of self-harm, such as suicide, cutting, and eating disorders, or that gives instructions or advice on how to commit such acts.",
+    )
+    sexual: bool = Field(
+        ...,
+        description="Content meant to arouse sexual excitement, such as the description of sexual activity, or that promotes sexual services (excluding sex education and wellness).",
+    )
+    sexual_minors: bool = Field(
+        ...,
+        alias="sexual/minors",
+        description="Sexual content that includes an individual who is under 18 years old.",
+    )
+    violence: bool = Field(
+        ..., description="Content that depicts death, violence, or physical injury."
+    )
+    violence_graphic: bool = Field(
+        ...,
+        alias="violence/graphic",
+        description="Content that depicts death, violence, or physical injury in graphic detail.",
+    )
+
+
+class CategoryScores(BaseModel):
+    hate: float = Field(..., description="The score for the category 'hate'.")
+    hate_threatening: float = Field(
+        ...,
+        alias="hate/threatening",
+        description="The score for the category 'hate/threatening'.",
+    )
+    harassment: float = Field(
+        ..., description="The score for the category 'harassment'."
+    )
+    harassment_threatening: float = Field(
+        ...,
+        alias="harassment/threatening",
+        description="The score for the category 'harassment/threatening'.",
+    )
+    self_harm: float = Field(
+        ..., alias="self-harm", description="The score for the category 'self-harm'."
+    )
+    self_harm_intent: float = Field(
+        ...,
+        alias="self-harm/intent",
+        description="The score for the category 'self-harm/intent'.",
+    )
+    self_harm_instructions: float = Field(
+        ...,
+        alias="self-harm/instructions",
+        description="The score for the category 'self-harm/instructions'.",
+    )
+    sexual: float = Field(..., description="The score for the category 'sexual'.")
+    sexual_minors: float = Field(
+        ...,
+        alias="sexual/minors",
+        description="The score for the category 'sexual/minors'.",
+    )
+    violence: float = Field(..., description="The score for the category 'violence'.")
+    violence_graphic: float = Field(
+        ...,
+        alias="violence/graphic",
+        description="The score for the category 'violence/graphic'.",
+    )
+
+
+class Result(BaseModel):
+    flagged: bool = Field(
+        ...,
+        description="Whether the content violates [OpenAI's usage policies](/policies/usage-policies).",
+    )
+    categories: Categories = Field(
+        ...,
+        description="A list of the categories, and whether they are flagged or not.",
+    )
+    category_scores: CategoryScores = Field(
+        ...,
+        description="A list of the categories along with their scores as predicted by model.",
+    )
+
+
+class CreateModerationResponse(BaseModel):
+    id: str = Field(
+        ..., description="The unique identifier for the moderation request."
+    )
+    model: str = Field(
+        ..., description="The model used to generate the moderation results."
+    )
+    results: List[Result] = Field(..., description="A list of moderation objects.")
+
+
+class Object7(Enum):
+    list = "list"
+
+
+class Purpose(Enum):
+    fine_tune = "fine-tune"
+    assistants = "assistants"
+
+
+class CreateFileRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    file: bytes = Field(
+        ..., description="The File object (not file name) to be uploaded.\n"
+    )
+    purpose: Purpose = Field(
+        ...,
+        description='The intended purpose of the uploaded file.\n\nUse "fine-tune" for [Fine-tuning](/docs/api-reference/fine-tuning) and "assistants" for [Assistants](/docs/api-reference/assistants) and [Messages](/docs/api-reference/messages). This allows us to validate the format of the uploaded file is correct for fine-tuning.\n',
+    )
+
+
+class Object8(Enum):
+    file = "file"
+
+
+class DeleteFileResponse(BaseModel):
+    id: str
+    object: Object8
+    deleted: bool
+
+
+class ModelEnum7(Enum):
+    babbage_002 = "babbage-002"
+    davinci_002 = "davinci-002"
+    gpt_3_5_turbo = "gpt-3.5-turbo"
+
+
+class BatchSizeEnum(Enum):
+    auto = "auto"
+
+
+class LearningRateMultiplierEnum(Enum):
+    auto = "auto"
+
+
+class NEpoch(Enum):
+    auto = "auto"
+
+
+class Hyperparameters(BaseModel):
+    batch_size: Optional[Union[BatchSizeEnum, conint(ge=1, le=256)]] = Field(
+        "auto",
+        description="Number of examples in each batch. A larger batch size means that model parameters\nare updated less frequently, but with lower variance.\n",
+    )
+    learning_rate_multiplier: Optional[
+        Union[LearningRateMultiplierEnum, PositiveFloat]
+    ] = Field(
+        "auto",
+        description="Scaling factor for the learning rate. A smaller learning rate may be useful to avoid\noverfitting.\n",
+    )
+    n_epochs: Optional[Union[NEpoch, conint(ge=1, le=50)]] = Field(
+        "auto",
+        description="The number of epochs to train the model for. An epoch refers to one full cycle \nthrough the training dataset.\n",
+    )
+
+
+class CreateFineTuningJobRequest(BaseModel):
+    model: Union[str, ModelEnum7] = Field(
+        ...,
+        description="The name of the model to fine-tune. You can select one of the\n[supported models](/docs/guides/fine-tuning/what-models-can-be-fine-tuned).\n",
+        example="gpt-3.5-turbo",
+    )
+    training_file: str = Field(
+        ...,
+        description="The ID of an uploaded file that contains training data.\n\nSee [upload file](/docs/api-reference/files/upload) for how to upload a file.\n\nYour dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`.\n\nSee the [fine-tuning guide](/docs/guides/fine-tuning) for more details.\n",
+        example="file-abc123",
+    )
+    hyperparameters: Optional[Hyperparameters] = Field(
+        None, description="The hyperparameters used for the fine-tuning job."
+    )
+    suffix: Optional[constr(min_length=1, max_length=40)] = Field(
+        None,
+        description='A string of up to 18 characters that will be added to your fine-tuned model name.\n\nFor example, a `suffix` of "custom-model-name" would produce a model name like `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.\n',
+    )
+    validation_file: Optional[str] = Field(
+        None,
+        description="The ID of an uploaded file that contains validation data.\n\nIf you provide this file, the data is used to generate validation\nmetrics periodically during fine-tuning. These metrics can be viewed in\nthe fine-tuning results file.\nThe same data should not be present in both train and validation files.\n\nYour dataset must be formatted as a JSONL file. You must upload your file with the purpose `fine-tune`.\n\nSee the [fine-tuning guide](/docs/guides/fine-tuning) for more details.\n",
+        example="file-abc123",
+    )
+
+
+class Object9(Enum):
+    list = "list"
+
+
+class NEpoch1(Enum):
+    auto = "auto"
+
+
+class Hyperparameters1(BaseModel):
+    n_epochs: Optional[Union[NEpoch1, conint(ge=1, le=50)]] = Field(
+        "auto",
+        description="The number of epochs to train the model for. An epoch refers to one\nfull cycle through the training dataset.\n",
+    )
+
+
+class ModelEnum8(Enum):
+    ada = "ada"
+    babbage = "babbage"
+    curie = "curie"
+    davinci = "davinci"
+
+
+class CreateFineTuneRequest(BaseModel):
+    training_file: str = Field(
+        ...,
+        description='The ID of an uploaded file that contains training data.\n\nSee [upload file](/docs/api-reference/files/upload) for how to upload a file.\n\nYour dataset must be formatted as a JSONL file, where each training\nexample is a JSON object with the keys "prompt" and "completion".\nAdditionally, you must upload your file with the purpose `fine-tune`.\n\nSee the [fine-tuning guide](/docs/guides/legacy-fine-tuning/creating-training-data) for more details.\n',
+        example="file-abc123",
+    )
+    batch_size: Optional[int] = Field(
+        None,
+        description="The batch size to use for training. The batch size is the number of\ntraining examples used to train a single forward and backward pass.\n\nBy default, the batch size will be dynamically configured to be\n~0.2% of the number of examples in the training set, capped at 256 -\nin general, we've found that larger batch sizes tend to work better\nfor larger datasets.\n",
+    )
+    classification_betas: Optional[List[float]] = Field(
+        None,
+        description="If this is provided, we calculate F-beta scores at the specified\nbeta values. The F-beta score is a generalization of F-1 score.\nThis is only used for binary classification.\n\nWith a beta of 1 (i.e. the F-1 score), precision and recall are\ngiven the same weight. A larger beta score puts more weight on\nrecall and less on precision. A smaller beta score puts more weight\non precision and less on recall.\n",
+        example=[0.6, 1, 1.5, 2],
+    )
+    classification_n_classes: Optional[int] = Field(
+        None,
+        description="The number of classes in a classification task.\n\nThis parameter is required for multiclass classification.\n",
+    )
+    classification_positive_class: Optional[str] = Field(
+        None,
+        description="The positive class in binary classification.\n\nThis parameter is needed to generate precision, recall, and F1\nmetrics when doing binary classification.\n",
+    )
+    compute_classification_metrics: Optional[bool] = Field(
+        False,
+        description="If set, we calculate classification-specific metrics such as accuracy\nand F-1 score using the validation set at the end of every epoch.\nThese metrics can be viewed in the [results file](/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).\n\nIn order to compute classification metrics, you must provide a\n`validation_file`. Additionally, you must\nspecify `classification_n_classes` for multiclass classification or\n`classification_positive_class` for binary classification.\n",
+    )
+    hyperparameters: Optional[Hyperparameters1] = Field(
+        None, description="The hyperparameters used for the fine-tuning job."
+    )
+    learning_rate_multiplier: Optional[float] = Field(
+        None,
+        description="The learning rate multiplier to use for training.\nThe fine-tuning learning rate is the original learning rate used for\npretraining multiplied by this value.\n\nBy default, the learning rate multiplier is the 0.05, 0.1, or 0.2\ndepending on final `batch_size` (larger learning rates tend to\nperform better with larger batch sizes). We recommend experimenting\nwith values in the range 0.02 to 0.2 to see what produces the best\nresults.\n",
+    )
+    model: Optional[Union[str, ModelEnum8]] = Field(
+        "curie",
+        description='The name of the base model to fine-tune. You can select one of "ada",\n"babbage", "curie", "davinci", or a fine-tuned model created after 2022-04-21 and before 2023-08-22.\nTo learn more about these models, see the\n[Models](/docs/models) documentation.\n',
+        example="curie",
+    )
+    prompt_loss_weight: Optional[float] = Field(
+        0.01,
+        description="The weight to use for loss on the prompt tokens. This controls how\nmuch the model tries to learn to generate the prompt (as compared\nto the completion which always has a weight of 1.0), and can add\na stabilizing effect to training when completions are short.\n\nIf prompts are extremely long (relative to completions), it may make\nsense to reduce this weight so as to avoid over-prioritizing\nlearning the prompt.\n",
+    )
+    suffix: Optional[constr(min_length=1, max_length=40)] = Field(
+        None,
+        description='A string of up to 40 characters that will be added to your fine-tuned model name.\n\nFor example, a `suffix` of "custom-model-name" would produce a model name like `ada:ft-your-org:custom-model-name-2022-02-15-04-21-04`.\n',
+    )
+    validation_file: Optional[str] = Field(
+        None,
+        description='The ID of an uploaded file that contains validation data.\n\nIf you provide this file, the data is used to generate validation\nmetrics periodically during fine-tuning. These metrics can be viewed in\nthe [fine-tuning results file](/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).\nYour train and validation data should be mutually exclusive.\n\nYour dataset must be formatted as a JSONL file, where each validation\nexample is a JSON object with the keys "prompt" and "completion".\nAdditionally, you must upload your file with the purpose `fine-tune`.\n\nSee the [fine-tuning guide](/docs/guides/legacy-fine-tuning/creating-training-data) for more details.\n',
+        example="file-abc123",
+    )
+
+
+class Object10(Enum):
+    list = "list"
+
+
+class Object11(Enum):
+    list = "list"
+
+
+class InputItem(BaseModel):
+    __root__: List[Any]
+
+
+class ModelEnum9(Enum):
+    text_embedding_ada_002 = "text-embedding-ada-002"
+
+
+class EncodingFormat(Enum):
+    float = "float"
+    base64 = "base64"
+
+
+class CreateEmbeddingRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    input: Union[str, List[str], List[int], List[InputItem]] = Field(
+        ...,
+        description="Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`) and cannot be an empty string. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n",
+        example="The quick brown fox jumped over the lazy dog",
+    )
+    model: Union[str, ModelEnum9] = Field(
+        ...,
+        description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
+        example="text-embedding-ada-002",
+    )
+    encoding_format: Optional[EncodingFormat] = Field(
+        "float",
+        description="The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).",
+        example="float",
+    )
+    user: Optional[str] = Field(
+        None,
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
+        example="user-1234",
+    )
+
+
+class Object12(Enum):
+    list = "list"
+
+
+class Usage(BaseModel):
+    prompt_tokens: int = Field(
+        ..., description="The number of tokens used by the prompt."
+    )
+    total_tokens: int = Field(
+        ..., description="The total number of tokens used by the request."
+    )
+
+
+class ModelEnum10(Enum):
+    whisper_1 = "whisper-1"
+
+
+class ResponseFormat4(Enum):
+    json = "json"
+    text = "text"
+    srt = "srt"
+    verbose_json = "verbose_json"
+    vtt = "vtt"
+
+
+class CreateTranscriptionRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    file: bytes = Field(
+        ...,
+        description="The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n",
+    )
+    model: Union[str, ModelEnum10] = Field(
+        ...,
+        description="ID of the model to use. Only `whisper-1` is currently available.\n",
+        example="whisper-1",
+    )
+    language: Optional[str] = Field(
+        None,
+        description="The language of the input audio. Supplying the input language in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency.\n",
+    )
+    prompt: Optional[str] = Field(
+        None,
+        description="An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.\n",
+    )
+    response_format: Optional[ResponseFormat4] = Field(
+        "json",
+        description="The format of the transcript output, in one of these options: `json`, `text`, `srt`, `verbose_json`, or `vtt`.\n",
+    )
+    temperature: Optional[float] = Field(
+        0,
+        description="The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit.\n",
+    )
+
+
+class CreateTranscriptionResponse(BaseModel):
+    text: str
+
+
+class ModelEnum11(Enum):
+    whisper_1 = "whisper-1"
+
+
+class CreateTranslationRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    file: bytes = Field(
+        ...,
+        description="The audio file object (not file name) translate, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n",
+    )
+    model: Union[str, ModelEnum11] = Field(
+        ...,
+        description="ID of the model to use. Only `whisper-1` is currently available.\n",
+        example="whisper-1",
+    )
+    prompt: Optional[str] = Field(
+        None,
+        description="An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/guides/speech-to-text/prompting) should be in English.\n",
+    )
+    response_format: Optional[str] = Field(
+        "json",
+        description="The format of the transcript output, in one of these options: `json`, `text`, `srt`, `verbose_json`, or `vtt`.\n",
+    )
+    temperature: Optional[float] = Field(
+        0,
+        description="The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit.\n",
+    )
+
+
+class CreateTranslationResponse(BaseModel):
+    text: str
+
+
+class ModelEnum12(Enum):
+    tts_1 = "tts-1"
+    tts_1_hd = "tts-1-hd"
+
+
+class Voice(Enum):
+    alloy = "alloy"
+    echo = "echo"
+    fable = "fable"
+    onyx = "onyx"
+    nova = "nova"
+    shimmer = "shimmer"
+
+
+class ResponseFormat5(Enum):
+    mp3 = "mp3"
+    opus = "opus"
+    aac = "aac"
+    flac = "flac"
+
+
+class CreateSpeechRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    model: Union[str, ModelEnum12] = Field(
+        ...,
+        description="One of the available [TTS models](/docs/models/tts): `tts-1` or `tts-1-hd`\n",
+    )
+    input: constr(max_length=4096) = Field(
+        ...,
+        description="The text to generate audio for. The maximum length is 4096 characters.",
+    )
+    voice: Voice = Field(
+        ...,
+        description="The voice to use when generating the audio. Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.",
+    )
+    response_format: Optional[ResponseFormat5] = Field(
+        "mp3",
+        description="The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`.",
+    )
+    speed: Optional[confloat(ge=0.25, le=4.0)] = Field(
+        1.0,
+        description="The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is the default.",
+    )
+
+
+class Object13(Enum):
+    model = "model"
+
+
+class Model(BaseModel):
+    id: str = Field(
+        ...,
+        description="The model identifier, which can be referenced in the API endpoints.",
+    )
+    created: int = Field(
+        ..., description="The Unix timestamp (in seconds) when the model was created."
+    )
+    object: Object13 = Field(
+        ..., description='The object type, which is always "model".'
+    )
+    owned_by: str = Field(..., description="The organization that owns the model.")
+
+
+class Object14(Enum):
+    file = "file"
+
+
+class Purpose1(Enum):
+    fine_tune = "fine_tune"
+    fine_tune_results = "fine_tune_results"
+    assistants = "assistants"
+    assistants_output = "assistants_output"
+
+
+class Status(Enum):
+    uploaded = "uploaded"
+    processed = "processed"
+    error = "error"
+
+
+class OpenAIFile(BaseModel):
+    id: str = Field(
+        ...,
+        description="The file identifier, which can be referenced in the API endpoints.",
+    )
+    bytes: int = Field(..., description="The size of the file, in bytes.")
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the file was created.",
+    )
+    filename: str = Field(..., description="The name of the file.")
+    object: Object14 = Field(
+        ..., description="The object type, which is always `file`."
+    )
+    purpose: Purpose1 = Field(
+        ...,
+        description="The intended purpose of the file. Supported values are `fine-tune`, `fine-tune-results`, `assistants`, and `assistants_output`.",
+    )
+    status: Status = Field(
+        ...,
+        description="Deprecated. The current status of the file, which can be either `uploaded`, `processed`, or `error`.",
+    )
+    status_details: Optional[str] = Field(
+        None,
+        description="Deprecated. For details on why a fine-tuning training file failed validation, see the `error` field on `fine_tuning.job`.",
+    )
+
+
+class Object15(Enum):
+    embedding = "embedding"
+
+
+class Embedding(BaseModel):
+    index: int = Field(
+        ..., description="The index of the embedding in the list of embeddings."
+    )
+    embedding: List[float] = Field(
+        ...,
+        description="The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings).\n",
+    )
+    object: Object15 = Field(
+        ..., description='The object type, which is always "embedding".'
+    )
+
+
+class Error1(BaseModel):
+    code: str = Field(..., description="A machine-readable error code.")
+    message: str = Field(..., description="A human-readable error message.")
+    param: str = Field(
+        ...,
+        description="The parameter that was invalid, usually `training_file` or `validation_file`. This field will be null if the failure was not parameter-specific.",
+    )
+
+
+class NEpoch2(Enum):
+    auto = "auto"
+
+
+class Hyperparameters2(BaseModel):
+    n_epochs: Union[NEpoch2, conint(ge=1, le=50)] = Field(
+        ...,
+        description='The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset.\n"auto" decides the optimal number of epochs based on the size of the dataset. If setting the number manually, we support any number between 1 and 50 epochs.',
+    )
+
+
+class Object16(Enum):
+    fine_tuning_job = "fine_tuning.job"
+
+
+class Status1(Enum):
+    validating_files = "validating_files"
+    queued = "queued"
+    running = "running"
+    succeeded = "succeeded"
+    failed = "failed"
+    cancelled = "cancelled"
+
+
+class FineTuningJob(BaseModel):
+    id: str = Field(
+        ...,
+        description="The object identifier, which can be referenced in the API endpoints.",
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the fine-tuning job was created.",
+    )
+    error: Error1 = Field(
+        ...,
+        description="For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure.",
+    )
+    fine_tuned_model: str = Field(
+        ...,
+        description="The name of the fine-tuned model that is being created. The value will be null if the fine-tuning job is still running.",
+    )
+    finished_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the fine-tuning job was finished. The value will be null if the fine-tuning job is still running.",
+    )
+    hyperparameters: Hyperparameters2 = Field(
+        ...,
+        description="The hyperparameters used for the fine-tuning job. See the [fine-tuning guide](/docs/guides/fine-tuning) for more details.",
+    )
+    model: str = Field(..., description="The base model that is being fine-tuned.")
+    object: Object16 = Field(
+        ..., description='The object type, which is always "fine_tuning.job".'
+    )
+    organization_id: str = Field(
+        ..., description="The organization that owns the fine-tuning job."
+    )
+    result_files: List[str] = Field(
+        ...,
+        description="The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](/docs/api-reference/files/retrieve-contents).",
+    )
+    status: Status1 = Field(
+        ...,
+        description="The current status of the fine-tuning job, which can be either `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.",
+    )
+    trained_tokens: int = Field(
+        ...,
+        description="The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running.",
+    )
+    training_file: str = Field(
+        ...,
+        description="The file ID used for training. You can retrieve the training data with the [Files API](/docs/api-reference/files/retrieve-contents).",
+    )
+    validation_file: str = Field(
+        ...,
+        description="The file ID used for validation. You can retrieve the validation results with the [Files API](/docs/api-reference/files/retrieve-contents).",
+    )
+
+
+class Level(Enum):
+    info = "info"
+    warn = "warn"
+    error = "error"
+
+
+class Object17(Enum):
+    fine_tuning_job_event = "fine_tuning.job.event"
+
+
+class FineTuningJobEvent(BaseModel):
+    id: str
+    created_at: int
+    level: Level
+    message: str
+    object: Object17
+
+
+class Hyperparams(BaseModel):
+    batch_size: int = Field(
+        ...,
+        description="The batch size to use for training. The batch size is the number of\ntraining examples used to train a single forward and backward pass.\n",
+    )
+    classification_n_classes: Optional[int] = Field(
+        None,
+        description="The number of classes to use for computing classification metrics.\n",
+    )
+    classification_positive_class: Optional[str] = Field(
+        None,
+        description="The positive class to use for computing classification metrics.\n",
+    )
+    compute_classification_metrics: Optional[bool] = Field(
+        None,
+        description="The classification metrics to compute using the validation dataset at the end of every epoch.\n",
+    )
+    learning_rate_multiplier: float = Field(
+        ..., description="The learning rate multiplier to use for training.\n"
+    )
+    n_epochs: int = Field(
+        ...,
+        description="The number of epochs to train the model for. An epoch refers to one\nfull cycle through the training dataset.\n",
+    )
+    prompt_loss_weight: float = Field(
+        ..., description="The weight to use for loss on the prompt tokens.\n"
+    )
+
+
+class Object18(Enum):
+    fine_tune = "fine-tune"
+
+
+class Object19(Enum):
+    fine_tune_event = "fine-tune-event"
+
+
+class FineTuneEvent(BaseModel):
+    created_at: int
+    level: str
+    message: str
+    object: Object19
+
+
+class CompletionUsage(BaseModel):
+    completion_tokens: int = Field(
+        ..., description="Number of tokens in the generated completion."
+    )
+    prompt_tokens: int = Field(..., description="Number of tokens in the prompt.")
+    total_tokens: int = Field(
+        ...,
+        description="Total number of tokens used in the request (prompt + completion).",
+    )
+
+
+class Object20(Enum):
+    assistant = "assistant"
+
+
+class Object21(Enum):
+    assistant_deleted = "assistant.deleted"
+
+
+class DeleteAssistantResponse(BaseModel):
+    id: str
+    deleted: bool
+    object: Object21
+
+
+class Type7(Enum):
+    code_interpreter = "code_interpreter"
+
+
+class AssistantToolsCode(BaseModel):
+    type: Type7 = Field(
+        ..., description="The type of tool being defined: `code_interpreter`"
+    )
+
+
+class Type8(Enum):
+    retrieval = "retrieval"
+
+
+class AssistantToolsRetrieval(BaseModel):
+    type: Type8 = Field(..., description="The type of tool being defined: `retrieval`")
+
+
+class Type824(Enum):
+    retrieval = "browser"
+
+
+class AssistantToolsBrowser(BaseModel):
+    type: Type824 = Field(..., description="The type of tool being defined: `browser`")
+
+
+class Type9(Enum):
+    function = "function"
+
+
+class AssistantToolsFunction(BaseModel):
+    type: Type9 = Field(..., description="The type of tool being defined: `function`")
+    function: FunctionObject
+
+
+class Object22(Enum):
+    thread_run = "thread.run"
+
+
+class Status2(Enum):
+    queued = "queued"
+    in_progress = "in_progress"
+    requires_action = "requires_action"
+    cancelling = "cancelling"
+    cancelled = "cancelled"
+    failed = "failed"
+    completed = "completed"
+    expired = "expired"
+
+
+class Type10(Enum):
+    submit_tool_outputs = "submit_tool_outputs"
+
+
+class Code(Enum):
+    server_error = "server_error"
+    rate_limit_exceeded = "rate_limit_exceeded"
+
+
+class LastError(BaseModel):
+    code: Code = Field(
+        ..., description="One of `server_error` or `rate_limit_exceeded`."
+    )
+    message: str = Field(..., description="A human-readable description of the error.")
+
+
+class CreateRunRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    assistant_id: str = Field(
+        ...,
+        description="The ID of the [assistant](/docs/api-reference/assistants) to use to execute this run.",
+    )
+    model: Optional[str] = Field(
+        None,
+        description="The ID of the [Model](/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.",
+    )
+    instructions: Optional[str] = Field(
+        None,
+        description="Override the default system message of the assistant. This is useful for modifying the behavior on a per-run basis.",
+    )
+    tools: Optional[
+        List[
+            Union[
+                AssistantToolsCode,
+                AssistantToolsRetrieval,
+                AssistantToolsFunction,
+                AssistantToolsBrowser,
+            ]
+        ]
+    ] = Field(
+        None,
+        description="Override the tools the assistant can use for this run. This is useful for modifying the behavior on a per-run basis.",
+        max_items=20,
+    )
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class ModifyRunRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class ToolOutput(BaseModel):
+    tool_call_id: Optional[str] = Field(
+        None,
+        description="The ID of the tool call in the `required_action` object within the run object the output is being submitted for.",
+    )
+    output: Optional[str] = Field(
+        None,
+        description="The output of the tool call to be submitted to continue the run.",
+    )
+
+
+class SubmitToolOutputsRunRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    tool_outputs: List[ToolOutput] = Field(
+        ..., description="A list of tools for which the outputs are being submitted."
+    )
+
+
+class Type11(Enum):
+    function = "function"
+
+
+class Function3(BaseModel):
+    name: str = Field(..., description="The name of the function.")
+    arguments: str = Field(
+        ...,
+        description="The arguments that the model expects you to pass to the function.",
+    )
+
+
+class RunToolCallObject(BaseModel):
+    id: str = Field(
+        ...,
+        description="The ID of the tool call. This ID must be referenced when you submit the tool outputs in using the [Submit tool outputs to run](/docs/api-reference/runs/submitToolOutputs) endpoint.",
+    )
+    type: Type11 = Field(
+        ...,
+        description="The type of tool call the output is required for. For now, this is always `function`.",
+    )
+    function: Function3 = Field(..., description="The function definition.")
+
+
+class Object23(Enum):
+    thread = "thread"
+
+
+# class ThreadObject(BaseModel):
+#     id: str = Field(
+#         ..., description="The identifier, which can be referenced in API endpoints."
+#     )
+#     object: Object23 = Field(
+#         ..., description="The object type, which is always `thread`."
+#     )
+#     created_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the thread was created.",
+#     )
+#     metadata: Dict[str, Any] = Field(
+#         ...,
+#         description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+#     )
+
+
+class ModifyThreadRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class Object24(Enum):
+    thread_deleted = "thread.deleted"
+
+
+class DeleteThreadResponse(BaseModel):
+    id: str
+    deleted: bool
+    object: Object24
+
+
+class ListThreadsResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[ThreadObject]
+    first_id: str = Field(..., example="asst_hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="asst_QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+
+
+class Object25(Enum):
+    thread_message = "thread.message"
+
+
+class Role7(Enum):
+    user = "user"
+    assistant = "assistant"
+    tool_call = "tool_call"
+    tool_output = "tool_output"
+
+
+class Role8(Enum):
+    user = "user"
+
+
+class CreateMessageRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    role: Role8 = Field(
+        ...,
+        description="The role of the entity that is creating the message. Currently only `user` is supported.",
+    )
+    content: constr(min_length=1, max_length=32768) = Field(
+        ..., description="The content of the message."
+    )
+    file_ids: Optional[List[str]] = Field(
+        [],
+        description="A list of [File](/docs/api-reference/files) IDs that the message should use. There can be a maximum of 10 files attached to a message. Useful for tools like `retrieval` and `code_interpreter` that can access and use files.",
+        max_items=10,
+        min_items=1,
+    )
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class ModifyMessageRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class Object26(Enum):
+    thread_message_deleted = "thread.message.deleted"
+
+
+class DeleteMessageResponse(BaseModel):
+    id: str
+    deleted: bool
+    object: Object26
+
+
+class Type12(Enum):
+    image_file = "image_file"
+
+
+class ImageFile(BaseModel):
+    file_id: str = Field(
+        ...,
+        description="The [File](/docs/api-reference/files) ID of the image in the message content.",
+    )
+
+
+class MessageContentImageFileObject(BaseModel):
+    type: Type12 = Field(..., description="Always `image_file`.")
+    image_file: ImageFile
+
+
+class Type13(Enum):
+    text = "text"
+
+
+class Type14(Enum):
+    file_citation = "file_citation"
+
+
+class FileCitation(BaseModel):
+    file_id: str = Field(
+        ..., description="The ID of the specific File the citation is from."
+    )
+    quote: str = Field(..., description="The specific quote in the file.")
+
+
+class MessageContentTextAnnotationsFileCitationObject(BaseModel):
+    type: Type14 = Field(..., description="Always `file_citation`.")
+    text: str = Field(
+        ..., description="The text in the message content that needs to be replaced."
+    )
+    file_citation: FileCitation
+    start_index: conint(ge=0)
+    end_index: conint(ge=0)
+
+
+class Type15(Enum):
+    file_path = "file_path"
+
+
+class FilePath(BaseModel):
+    file_id: str = Field(..., description="The ID of the file that was generated.")
+
+
+class MessageContentTextAnnotationsFilePathObject(BaseModel):
+    type: Type15 = Field(..., description="Always `file_path`.")
+    text: str = Field(
+        ..., description="The text in the message content that needs to be replaced."
+    )
+    file_path: FilePath
+    start_index: conint(ge=0)
+    end_index: conint(ge=0)
+
+
+class Object27(Enum):
+    thread_run_step = "thread.run.step"
+
+
+class Type16(Enum):
+    message_creation = "message_creation"
+    tool_calls = "tool_calls"
+
+
+class Status3(Enum):
+    in_progress = "in_progress"
+    cancelled = "cancelled"
+    failed = "failed"
+    completed = "completed"
+    expired = "expired"
+
+
+class Code1(Enum):
+    server_error = "server_error"
+    rate_limit_exceeded = "rate_limit_exceeded"
+
+
+class LastError1(BaseModel):
+    code: Code1 = Field(
+        ..., description="One of `server_error` or `rate_limit_exceeded`."
+    )
+    message: str = Field(..., description="A human-readable description of the error.")
+
+
+class Type17(Enum):
+    message_creation = "message_creation"
+
+
+class MessageCreation(BaseModel):
+    message_id: str = Field(
+        ..., description="The ID of the message that was created by this run step."
+    )
+
+
+class RunStepDetailsMessageCreationObject(BaseModel):
+    type: Type17 = Field(..., description="Always `message_creation``.")
+    message_creation: MessageCreation
+
+
+class Type18(Enum):
+    tool_calls = "tool_calls"
+
+
+class Type19(Enum):
+    code_interpreter = "code_interpreter"
+
+
+class Type20(Enum):
+    logs = "logs"
+
+
+class RunStepDetailsToolCallsCodeOutputLogsObject(BaseModel):
+    type: Type20 = Field(..., description="Always `logs`.")
+    logs: str = Field(
+        ..., description="The text output from the Code Interpreter tool call."
+    )
+
+
+class Type21(Enum):
+    image = "image"
+
+
+class Image1(BaseModel):
+    file_id: str = Field(
+        ..., description="The [file](/docs/api-reference/files) ID of the image."
+    )
+
+
+class RunStepDetailsToolCallsCodeOutputImageObject(BaseModel):
+    type: Type21 = Field(..., description="Always `image`.")
+    image: Image1
+
+
+class Type22(Enum):
+    retrieval = "retrieval"
+
+
+class RunStepDetailsToolCallsRetrievalObject(BaseModel):
+    id: str = Field(..., description="The ID of the tool call object.")
+    type: Type22 = Field(
+        ...,
+        description="The type of tool call. This is always going to be `retrieval` for this type of tool call.",
+    )
+    retrieval: Dict[str, Any] = Field(
+        ..., description="For now, this is always going to be an empty object."
+    )
+
+
+class Type23(Enum):
+    function = "function"
+
+
+class Function4(BaseModel):
+    name: str = Field(..., description="The name of the function.")
+    arguments: str = Field(..., description="The arguments passed to the function.")
+    output: str = Field(
+        ...,
+        description="The output of the function. This will be `null` if the outputs have not been [submitted](/docs/api-reference/runs/submitToolOutputs) yet.",
+    )
+
+
+class RunStepDetailsToolCallsFunctionObject(BaseModel):
+    id: str = Field(..., description="The ID of the tool call object.")
+    type: Type23 = Field(
+        ...,
+        description="The type of tool call. This is always going to be `function` for this type of tool call.",
+    )
+    function: Function4 = Field(
+        ..., description="The definition of the function that was called."
+    )
+
+
+class Object28(Enum):
+    assistant_file = "assistant.file"
+
+
+class AssistantFileObject(BaseModel):
+    id: str = Field(
+        ..., description="The identifier, which can be referenced in API endpoints."
+    )
+    object: Object28 = Field(
+        ..., description="The object type, which is always `assistant.file`."
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the assistant file was created.",
+    )
+    assistant_id: str = Field(
+        ..., description="The assistant ID that the file is attached to."
+    )
+
+
+class CreateAssistantFileRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    file_id: str = Field(
+        ...,
+        description='A [File](/docs/api-reference/files) ID (with `purpose="assistants"`) that the assistant should use. Useful for tools like `retrieval` and `code_interpreter` that can access files.',
+    )
+
+
+class Object29(Enum):
+    assistant_file_deleted = "assistant.file.deleted"
+
+
+class DeleteAssistantFileResponse(BaseModel):
+    id: str
+    deleted: bool
+    object: Object29
+
+
+class ListAssistantFilesResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[AssistantFileObject]
+    first_id: str = Field(..., example="file-hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="file-QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+
+
+class Object30(Enum):
+    thread_message_file = "thread.message.file"
+
+
+class MessageFileObject(BaseModel):
+    id: str = Field(
+        ..., description="The identifier, which can be referenced in API endpoints."
+    )
+    object: Object30 = Field(
+        ..., description="The object type, which is always `thread.message.file`."
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the message file was created.",
+    )
+    message_id: str = Field(
+        ...,
+        description="The ID of the [message](/docs/api-reference/messages) that the [File](/docs/api-reference/files) is attached to.",
+    )
+
+
+class ListMessageFilesResponse(BaseModel):
+    object: str = Field(..., example="list")
+    data: List[MessageFileObject]
+    first_id: str = Field(..., example="file-hLBK7PXBv5Lr2NQT7KLY0ag1")
+    last_id: str = Field(..., example="file-QLoItBbqwyAJEzlTy4y9kOMM")
+    has_more: bool = Field(..., example=False)
+
+
+class Order(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order1(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order2(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order3(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order4(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order5(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order6(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order7(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order8(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order9(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order10(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class Order11(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
+class ListModelsResponse(BaseModel):
+    object: Object
+    data: List[Model]
+
+
+class CreateCompletionResponse(BaseModel):
+    id: str = Field(..., description="A unique identifier for the completion.")
+    choices: List[Choice] = Field(
+        ...,
+        description="The list of completion choices the model generated for the input prompt.",
+    )
+    created: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) of when the completion was created.",
+    )
+    model: str = Field(..., description="The model used for completion.")
+    system_fingerprint: Optional[str] = Field(
+        None,
+        description="This fingerprint represents the backend configuration that the model runs with.\n\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
+    )
+    object: Object1 = Field(
+        ..., description='The object type, which is always "text_completion"'
+    )
+    usage: Optional[CompletionUsage] = None
+
+
+class ChatCompletionRequestMessageContentPart(BaseModel):
+    __root__: Union[
+        ChatCompletionRequestMessageContentPartText,
+        ChatCompletionRequestMessageContentPartImage,
+    ]
+
+
+class ChatCompletionRequestUserMessage(BaseModel):
+    content: Union[str, List[ChatCompletionRequestMessageContentPart]] = Field(
+        ..., description="The contents of the user message.\n"
+    )
+    role: Role1 = Field(
+        ..., description="The role of the messages author, in this case `user`."
+    )
+
+
+class ChatCompletionTool(BaseModel):
+    type: Type2 = Field(
+        ...,
+        description="The type of the tool. Currently, only `function` is supported.",
+    )
+    function: FunctionObject
+
+
+class ChatCompletionToolChoiceOption(BaseModel):
+    __root__: Union[
+        ChatCompletionToolChoiceOptionEnum, ChatCompletionNamedToolChoice
+    ] = Field(
+        ...,
+        description='Controls which (if any) function is called by the model.\n`none` means the model will not call a function and instead generates a message.\n`auto` means the model can pick between generating a message or calling a function.\nSpecifying a particular function via `{"type: "function", "function": {"name": "my_function"}}` forces the model to call that function.\n\n`none` is the default when no functions are present. `auto` is the default if functions are present.\n',
+    )
+
+
+class ChatCompletionMessageToolCalls(BaseModel):
+    __root__: List[ChatCompletionMessageToolCall] = Field(
+        ...,
+        description="The tool calls generated by the model, such as function calls.",
+    )
+
+
+class ChatCompletionResponseMessage(BaseModel):
+    content: str = Field(..., description="The contents of the message.")
+    tool_calls: Optional[ChatCompletionMessageToolCalls] = None
+    role: Role5 = Field(..., description="The role of the author of this message.")
+    function_call: Optional[FunctionCall1] = Field(
+        None,
+        description="Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.",
+    )
+
+
+class Choice1(BaseModel):
+    finish_reason: FinishReason1 = Field(
+        ...,
+        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,\n`length` if the maximum number of tokens specified in the request was reached,\n`content_filter` if content was omitted due to a flag from our content filters,\n`tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.\n",
+    )
+    index: int = Field(
+        ..., description="The index of the choice in the list of choices."
+    )
+    message: ChatCompletionResponseMessage
+
+
+class CreateChatCompletionResponse(BaseModel):
+    id: str = Field(..., description="A unique identifier for the chat completion.")
+    choices: List[Choice1] = Field(
+        ...,
+        description="A list of chat completion choices. Can be more than one if `n` is greater than 1.",
+    )
+    created: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) of when the chat completion was created.",
+    )
+    model: str = Field(..., description="The model used for the chat completion.")
+    system_fingerprint: Optional[str] = Field(
+        None,
+        description="This fingerprint represents the backend configuration that the model runs with.\n\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
+    )
+    object: Object2 = Field(
+        ..., description="The object type, which is always `chat.completion`."
+    )
+    usage: Optional[CompletionUsage] = None
+
+
+class Choice2(BaseModel):
+    finish_reason: FinishReason2 = Field(
+        ...,
+        description="The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence, `length` if the maximum number of tokens specified in the request was reached, `content_filter` if content was omitted due to a flag from our content filters, or `function_call` if the model called a function.\n",
+    )
+    index: int = Field(
+        ..., description="The index of the choice in the list of choices."
+    )
+    message: ChatCompletionResponseMessage
+
+
+class CreateChatCompletionFunctionResponse(BaseModel):
+    id: str = Field(..., description="A unique identifier for the chat completion.")
+    choices: List[Choice2] = Field(
+        ...,
+        description="A list of chat completion choices. Can be more than one if `n` is greater than 1.",
+    )
+    created: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) of when the chat completion was created.",
+    )
+    model: str = Field(..., description="The model used for the chat completion.")
+    system_fingerprint: Optional[str] = Field(
+        None,
+        description="This fingerprint represents the backend configuration that the model runs with.\n\nCan be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.\n",
+    )
+    object: Object3 = Field(
+        ..., description="The object type, which is always `chat.completion`."
+    )
+    usage: Optional[CompletionUsage] = None
+
+
+class ListPaginatedFineTuningJobsResponse(BaseModel):
+    data: List[FineTuningJob]
+    has_more: bool
+    object: Object4
+
+
+class CreateEditResponse(BaseModel):
+    choices: List[Choice4] = Field(
+        ...,
+        description="A list of edit choices. Can be more than one if `n` is greater than 1.",
+    )
+    object: Object6 = Field(..., description="The object type, which is always `edit`.")
+    created: int = Field(
+        ..., description="The Unix timestamp (in seconds) of when the edit was created."
+    )
+    usage: CompletionUsage
+
+
+class ImagesResponse(BaseModel):
+    created: int
+    data: List[Image]
+
+
+class ListFilesResponse(BaseModel):
+    data: List[OpenAIFile]
+    object: Object7
+
+
+class ListFineTuningJobEventsResponse(BaseModel):
+    data: List[FineTuningJobEvent]
+    object: Object9
+
+
+class ListFineTuneEventsResponse(BaseModel):
+    data: List[FineTuneEvent]
+    object: Object11
+
+
+class CreateEmbeddingResponse(BaseModel):
+    data: List[Embedding] = Field(
+        ..., description="The list of embeddings generated by the model."
+    )
+    model: str = Field(
+        ..., description="The name of the model used to generate the embedding."
+    )
+    object: Object12 = Field(
+        ..., description='The object type, which is always "list".'
+    )
+    usage: Usage = Field(..., description="The usage information for the request.")
+
+
+class FineTune(BaseModel):
+    id: str = Field(
+        ...,
+        description="The object identifier, which can be referenced in the API endpoints.",
+    )
+    created_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the fine-tuning job was created.",
+    )
+    events: Optional[List[FineTuneEvent]] = Field(
+        None,
+        description="The list of events that have been observed in the lifecycle of the FineTune job.",
+    )
+    fine_tuned_model: str = Field(
+        ..., description="The name of the fine-tuned model that is being created."
+    )
+    hyperparams: Hyperparams = Field(
+        ...,
+        description="The hyperparameters used for the fine-tuning job. See the [fine-tuning guide](/docs/guides/legacy-fine-tuning/hyperparameters) for more details.",
+    )
+    model: str = Field(..., description="The base model that is being fine-tuned.")
+    object: Object18 = Field(
+        ..., description='The object type, which is always "fine-tune".'
+    )
+    organization_id: str = Field(
+        ..., description="The organization that owns the fine-tuning job."
+    )
+    result_files: List[OpenAIFile] = Field(
+        ..., description="The compiled results files for the fine-tuning job."
+    )
+    status: str = Field(
+        ...,
+        description="The current status of the fine-tuning job, which can be either `created`, `running`, `succeeded`, `failed`, or `cancelled`.",
+    )
+    training_files: List[OpenAIFile] = Field(
+        ..., description="The list of files used for training."
+    )
+    updated_at: int = Field(
+        ...,
+        description="The Unix timestamp (in seconds) for when the fine-tuning job was last updated.",
+    )
+    validation_files: List[OpenAIFile] = Field(
+        ..., description="The list of files used for validation."
+    )
+
+
+# class AssistantObject(BaseModel):
+#     id: str = Field(
+#         ..., description="The identifier, which can be referenced in API endpoints."
+#     )
+#     object: Object20 = Field(
+#         ..., description="The object type, which is always `assistant`."
+#     )
+#     created_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the assistant was created.",
+#     )
+#     name: constr(max_length=256) = Field(
+#         ...,
+#         description="The name of the assistant. The maximum length is 256 characters.\n",
+#     )
+#     description: constr(max_length=512) = Field(
+#         ...,
+#         description="The description of the assistant. The maximum length is 512 characters.\n",
+#     )
+#     model: str = Field(
+#         ...,
+#         description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
+#     )
+#     instructions: constr(max_length=32768) = Field(
+#         ...,
+#         description="The system instructions that the assistant uses. The maximum length is 32768 characters.\n",
+#     )
+#     tools: List[
+#         Union[AssistantToolsCode, AssistantToolsRetrieval, AssistantToolsFunction]
+#     ] = Field(
+#         ...,
+#         description="A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.\n",
+#         max_items=128,
+#     )
+#     file_ids: List[str] = Field(
+#         ...,
+#         description="A list of [file](/docs/api-reference/files) IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order.\n",
+#         max_items=20,
+#     )
+#     metadata: Dict[str, Any] = Field(
+#         ...,
+#         description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+#     )
+
+
+class CreateAssistantRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    model: str = Field(
+        ...,
+        description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
+    )
+    name: Optional[constr(max_length=256)] = Field(
+        None,
+        description="The name of the assistant. The maximum length is 256 characters.\n",
+    )
+    description: Optional[constr(max_length=512)] = Field(
+        None,
+        description="The description of the assistant. The maximum length is 512 characters.\n",
+    )
+    instructions: Optional[constr(max_length=32768)] = Field(
+        None,
+        description="The system instructions that the assistant uses. The maximum length is 32768 characters.\n",
+    )
+    tools: Optional[
+        List[
+            Union[
+                AssistantToolsCode,
+                AssistantToolsRetrieval,
+                AssistantToolsFunction,
+                AssistantToolsBrowser,
+            ]
+        ]
+    ] = Field(
+        [],
+        description="A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.\n",
+        max_items=128,
+    )
+    file_ids: Optional[List[str]] = Field(
+        [],
+        description="A list of [file](/docs/api-reference/files) IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order.\n",
+        max_items=20,
+    )
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class ModifyAssistantRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    model: Optional[str] = Field(
+        None,
+        description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
+    )
+    name: Optional[constr(max_length=256)] = Field(
+        None,
+        description="The name of the assistant. The maximum length is 256 characters.\n",
+    )
+    description: Optional[constr(max_length=512)] = Field(
+        None,
+        description="The description of the assistant. The maximum length is 512 characters.\n",
+    )
+    instructions: Optional[constr(max_length=32768)] = Field(
+        None,
+        description="The system instructions that the assistant uses. The maximum length is 32768 characters.\n",
+    )
+    tools: Optional[
+        List[
+            Union[
+                AssistantToolsCode,
+                AssistantToolsRetrieval,
+                AssistantToolsFunction,
+                AssistantToolsBrowser,
+            ]
+        ]
+    ] = Field(
+        [],
+        description="A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.\n",
+        max_items=128,
+    )
+    file_ids: Optional[List[str]] = Field(
+        [],
+        description="A list of [File](/docs/api-reference/files) IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order. If a file was previosuly attached to the list but does not show up in the list, it will be deleted from the assistant.\n",
+        max_items=20,
+    )
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class SubmitToolOutputs(BaseModel):
+    tool_calls: List[RunToolCallObject] = Field(
+        ..., description="A list of the relevant tool calls."
+    )
+
+
+class RequiredAction(BaseModel):
+    type: Type10 = Field(
+        ..., description="For now, this is always `submit_tool_outputs`."
+    )
+    submit_tool_outputs: SubmitToolOutputs = Field(
+        ..., description="Details on the tool outputs needed for this run to continue."
+    )
+
+
+# class RunObject(BaseModel):
+#     id: str = Field(
+#         ..., description="The identifier, which can be referenced in API endpoints."
+#     )
+#     object: Object22 = Field(
+#         ..., description="The object type, which is always `thread.run`."
+#     )
+#     created_at: int = Field(
+#         ..., description="The Unix timestamp (in seconds) for when the run was created."
+#     )
+#     thread_id: str = Field(
+#         ...,
+#         description="The ID of the [thread](/docs/api-reference/threads) that was executed on as a part of this run.",
+#     )
+#     assistant_id: str = Field(
+#         ...,
+#         description="The ID of the [assistant](/docs/api-reference/assistants) used for execution of this run.",
+#     )
+#     status: Status2 = Field(
+#         ...,
+#         description="The status of the run, which can be either `queued`, `in_progress`, `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, or `expired`.",
+#     )
+#     required_action: RequiredAction = Field(
+#         ...,
+#         description="Details on the action required to continue the run. Will be `null` if no action is required.",
+#     )
+#     last_error: LastError = Field(
+#         ...,
+#         description="The last error associated with this run. Will be `null` if there are no errors.",
+#     )
+#     expires_at: int = Field(
+#         ..., description="The Unix timestamp (in seconds) for when the run will expire."
+#     )
+#     started_at: int = Field(
+#         ..., description="The Unix timestamp (in seconds) for when the run was started."
+#     )
+#     cancelled_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the run was cancelled.",
+#     )
+#     failed_at: int = Field(
+#         ..., description="The Unix timestamp (in seconds) for when the run failed."
+#     )
+#     completed_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the run was completed.",
+#     )
+#     model: str = Field(
+#         ...,
+#         description="The model that the [assistant](/docs/api-reference/assistants) used for this run.",
+#     )
+#     instructions: str = Field(
+#         ...,
+#         description="The instructions that the [assistant](/docs/api-reference/assistants) used for this run.",
+#     )
+#     tools: List[
+#         Union[AssistantToolsCode, AssistantToolsRetrieval, AssistantToolsFunction]
+#     ] = Field(
+#         ...,
+#         description="The list of tools that the [assistant](/docs/api-reference/assistants) used for this run.",
+#         max_items=20,
+#     )
+#     file_ids: List[str] = Field(
+#         ...,
+#         description="The list of [File](/docs/api-reference/files) IDs the [assistant](/docs/api-reference/assistants) used for this run.",
+#     )
+#     metadata: Dict[str, Any] = Field(
+#         ...,
+#         description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+#     )
+
+
+# class ListRunsResponse(BaseModel):
+#     object: str = Field(..., example="list")
+#     data: List[RunObject]
+#     first_id: str = Field(..., example="run_hLBK7PXBv5Lr2NQT7KLY0ag1")
+#     last_id: str = Field(..., example="run_QLoItBbqwyAJEzlTy4y9kOMM")
+#     has_more: bool = Field(..., example=False)
+
+
+class CreateThreadRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    messages: Optional[List[CreateMessageRequest]] = Field(
+        None,
+        description="A list of [messages](/docs/api-reference/messages) to start the thread with.",
+    )
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+class Text(BaseModel):
+    value: str = Field(..., description="The data that makes up the text.")
+    annotations: List[
+        Union[
+            MessageContentTextAnnotationsFileCitationObject,
+            MessageContentTextAnnotationsFilePathObject,
+        ]
+    ]
+
+
+class MessageContentTextObject(BaseModel):
+    type: Type13 = Field(..., description="Always `text`.")
+    text: Text
+
+
+class CodeInterpreter(BaseModel):
+    input: str = Field(..., description="The input to the Code Interpreter tool call.")
+    outputs: List[
+        Union[
+            RunStepDetailsToolCallsCodeOutputLogsObject,
+            RunStepDetailsToolCallsCodeOutputImageObject,
+        ]
+    ] = Field(
+        ...,
+        description="The outputs from the Code Interpreter tool call. Code Interpreter can output one or more items, including text (`logs`) or images (`image`). Each of these are represented by a different object type.",
+    )
+
+
+class RunStepDetailsToolCallsCodeObject(BaseModel):
+    id: str = Field(..., description="The ID of the tool call.")
+    type: Type19 = Field(
+        ...,
+        description="The type of tool call. This is always going to be `code_interpreter` for this type of tool call.",
+    )
+    code_interpreter: CodeInterpreter = Field(
+        ..., description="The Code Interpreter tool call definition."
+    )
+
+
+class ChatCompletionRequestAssistantMessage(BaseModel):
+    content: str = Field(..., description="The contents of the assistant message.\n")
+    role: Role2 = Field(
+        ..., description="The role of the messages author, in this case `assistant`."
+    )
+    tool_calls: Optional[ChatCompletionMessageToolCalls] = None
+    function_call: Optional[FunctionCall] = Field(
+        None,
+        description="Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.",
+    )
+
+
+class ListFineTunesResponse(BaseModel):
+    data: List[FineTune]
+    object: Object10
+
+
+class CreateThreadAndRunRequest(BaseModel):
+    class Config:
+        extra = Extra.forbid
+
+    assistant_id: str = Field(
+        ...,
+        description="The ID of the [assistant](/docs/api-reference/assistants) to use to execute this run.",
+    )
+    thread: Optional[CreateThreadRequest] = Field(
+        None, description="If no thread is provided, an empty thread will be created."
+    )
+    model: Optional[str] = Field(
+        None,
+        description="The ID of the [Model](/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.",
+    )
+    instructions: Optional[str] = Field(
+        None,
+        description="Override the default system message of the assistant. This is useful for modifying the behavior on a per-run basis.",
+    )
+    tools: Optional[
+        List[
+            Union[
+                AssistantToolsCode,
+                AssistantToolsRetrieval,
+                AssistantToolsFunction,
+                AssistantToolsBrowser,
+            ]
+        ]
+    ] = Field(
+        None,
+        description="Override the tools the assistant can use for this run. This is useful for modifying the behavior on a per-run basis.",
+        max_items=20,
+    )
+    metadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+    )
+
+
+# class MessageObject(BaseModel):
+#     id: str = Field(
+#         ..., description="The identifier, which can be referenced in API endpoints."
+#     )
+#     object: Object25 = Field(
+#         ..., description="The object type, which is always `thread.message`."
+#     )
+#     created_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the message was created.",
+#     )
+#     thread_id: str = Field(
+#         ...,
+#         description="The [thread](/docs/api-reference/threads) ID that this message belongs to.",
+#     )
+#     role: Role7 = Field(
+#         ...,
+#         description="The entity that produced the message. One of `user` or `assistant`.",
+#     )
+#     content: List[
+#         Union[MessageContentImageFileObject, MessageContentTextObject]
+#     ] = Field(
+#         ..., description="The content of the message in array of text and/or images."
+#     )
+#     assistant_id: str = Field(
+#         ...,
+#         description="If applicable, the ID of the [assistant](/docs/api-reference/assistants) that authored this message.",
+#     )
+#     run_id: str = Field(
+#         ...,
+#         description="If applicable, the ID of the [run](/docs/api-reference/runs) associated with the authoring of this message.",
+#     )
+#     file_ids: List[str] = Field(
+#         ...,
+#         description="A list of [file](/docs/api-reference/files) IDs that the assistant should use. Useful for tools like retrieval and code_interpreter that can access files. A maximum of 10 files can be attached to a message.",
+#         max_items=10,
+#     )
+#     metadata: Dict[str, Any] = Field(
+#         ...,
+#         description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+#     )
+
+
+class RunStepDetailsToolCallsObject(BaseModel):
+    type: Type18 = Field(..., description="Always `tool_calls`.")
+    tool_calls: List[
+        Union[
+            RunStepDetailsToolCallsCodeObject,
+            RunStepDetailsToolCallsRetrievalObject,
+            RunStepDetailsToolCallsFunctionObject,
+        ]
+    ] = Field(
+        ...,
+        description="An array of tool calls the run step was involved in. These can be associated with one of three types of tools: `code_interpreter`, `retrieval`, or `function`.\n",
+    )
+
+
+class ChatCompletionRequestMessage(BaseModel):
+    __root__: Union[
+        ChatCompletionRequestSystemMessage,
+        ChatCompletionRequestUserMessage,
+        ChatCompletionRequestAssistantMessage,
+        ChatCompletionRequestToolMessage,
+        ChatCompletionRequestFunctionMessage,
+    ]
+
+
+class CreateChatCompletionRequest(BaseModel):
+    messages: List[ChatCompletionRequestMessage] = Field(
+        ...,
+        description="A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).",
+        min_items=1,
+    )
+    model: Union[str, ModelEnum1] = Field(
+        ...,
+        description="ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API.",
+        example="gpt-3.5-turbo",
+    )
+    frequency_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
+        0,
+        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)\n",
+    )
+    logit_bias: Optional[Dict[str, int]] = Field(
+        None,
+        description="Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.\n",
+    )
+    max_tokens: Optional[int] = Field(
+        "inf",
+        description="The maximum number of [tokens](/tokenizer) to generate in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n",
+    )
+    n: Optional[conint(ge=1, le=128)] = Field(
+        1,
+        description="How many chat completion choices to generate for each input message.",
+        example=1,
+    )
+    presence_penalty: Optional[confloat(ge=-2.0, le=2.0)] = Field(
+        0,
+        description="Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)\n",
+    )
+    response_format: Optional[ResponseFormat] = Field(
+        None,
+        description='An object specifying the format that the model must output. \n\nSetting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in increased latency and appearance of a "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.\n',
+    )
+    seed: Optional[conint(ge=-9223372036854775808, le=9223372036854775808)] = Field(
+        None,
+        description="This feature is in Beta. \nIf specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.\n",
+    )
+    stop: Optional[Union[str, List[str]]] = Field(
+        None,
+        description="Up to 4 sequences where the API will stop generating further tokens.\n",
+    )
+    stream: Optional[bool] = Field(
+        False,
+        description="If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).\n",
+    )
+    temperature: Optional[confloat(ge=0.0, le=2.0)] = Field(
+        1,
+        description="What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.\n",
+        example=1,
+    )
+    top_p: Optional[confloat(ge=0.0, le=1.0)] = Field(
+        1,
+        description="An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\nWe generally recommend altering this or `temperature` but not both.\n",
+        example=1,
+    )
+    tools: Optional[List[ChatCompletionTool]] = Field(
+        None,
+        description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.\n",
+    )
+    tool_choice: Optional[ChatCompletionToolChoiceOption] = None
+    user: Optional[str] = Field(
+        None,
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n",
+        example="user-1234",
+    )
+    function_call: Optional[
+        Union[FunctionCallEnum, ChatCompletionFunctionCallOption]
+    ] = Field(
+        None,
+        description='Deprecated in favor of `tool_choice`.\n\nControls which (if any) function is called by the model.\n`none` means the model will not call a function and instead generates a message.\n`auto` means the model can pick between generating a message or calling a function.\nSpecifying a particular function via `{"name": "my_function"}` forces the model to call that function.\n\n`none` is the default when no functions are present. `auto`` is the default if functions are present.\n',
+    )
+    functions: Optional[List[ChatCompletionFunctions]] = Field(
+        None,
+        description="Deprecated in favor of `tools`.\n\nA list of functions the model may generate JSON inputs for.\n",
+        max_items=128,
+        min_items=1,
+    )
+
+
+# class RunStepObject(BaseModel):
+#     id: str = Field(
+#         ...,
+#         description="The identifier of the run step, which can be referenced in API endpoints.",
+#     )
+#     object: Object27 = Field(
+#         ..., description="The object type, which is always `thread.run.step``."
+#     )
+#     created_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the run step was created.",
+#     )
+#     assistant_id: str = Field(
+#         ...,
+#         description="The ID of the [assistant](/docs/api-reference/assistants) associated with the run step.",
+#     )
+#     thread_id: str = Field(
+#         ...,
+#         description="The ID of the [thread](/docs/api-reference/threads) that was run.",
+#     )
+#     run_id: str = Field(
+#         ...,
+#         description="The ID of the [run](/docs/api-reference/runs) that this run step is a part of.",
+#     )
+#     type: Type16 = Field(
+#         ...,
+#         description="The type of run step, which can be either `message_creation` or `tool_calls`.",
+#     )
+#     status: Status3 = Field(
+#         ...,
+#         description="The status of the run step, which can be either `in_progress`, `cancelled`, `failed`, `completed`, or `expired`.",
+#     )
+#     step_details: Union[
+#         RunStepDetailsMessageCreationObject, RunStepDetailsToolCallsObject
+#     ] = Field(..., description="The details of the run step.")
+#     last_error: LastError1 = Field(
+#         ...,
+#         description="The last error associated with this run step. Will be `null` if there are no errors.",
+#     )
+#     expired_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the run step expired. A step is considered expired if the parent run is expired.",
+#     )
+#     cancelled_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the run step was cancelled.",
+#     )
+#     failed_at: int = Field(
+#         ..., description="The Unix timestamp (in seconds) for when the run step failed."
+#     )
+#     completed_at: int = Field(
+#         ...,
+#         description="The Unix timestamp (in seconds) for when the run step completed.",
+#     )
+#     metadata: Dict[str, Any] = Field(
+#         ...,
+#         description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.\n",
+#     )
+
+
+# class ListRunStepsResponse(BaseModel):
+#     object: str = Field(..., example="list")
+#     data: List[RunStepObject]
+#     first_id: str = Field(..., example="step_hLBK7PXBv5Lr2NQT7KLY0ag1")
+#     last_id: str = Field(..., example="step_QLoItBbqwyAJEzlTy4y9kOMM")
+#     has_more: bool = Field(..., example=False)
diff --git a/services/backend/api_server/requirements.txt b/services/backend/api_server/requirements.txt
new file mode 100644
index 0000000..109134c
--- /dev/null
+++ b/services/backend/api_server/requirements.txt
@@ -0,0 +1,15 @@
+aioredis==2.0.1
+beanie==1.23.6
+celery==5.3.6
+fastapi==0.105.0
+motor==3.3.2
+openai==1.6.1
+pymilvus==2.3.4
+pydantic==1.10.9
+python-multipart==0.0.6
+redis==5.0.1
+uvicorn==0.25.0
+websockets==12.0
+requests==2.31.0
+googlesearch-python==1.2.3
+langchain==0.0.351