smallcloudai · mitya52 · Dec 20, 2024 · Dec 16, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/refact_known_models/passthrough.py b/refact_known_models/passthrough.py
@@ -209,5 +209,55 @@
         "pp1000t_prompt": 150,
         "pp1000t_generated": 600,  # TODO: don't know the price
         "filter_caps": ["chat", "completion"],
-    }
-}
+    },
+
+    # gemini and gemma bear the same tokenizer
+    # according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677
+    # downloadable tokenizer.json does not exist for gemini, proposed solution to use vertexai lib in python uses web requests
+    # for pricing consult: https://ai.google.dev/pricing
+    # pricing below is assumed for <= 128_000 context is used
+
+    "gemini-2.0-flash-exp": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-2.0-flash-exp",
+        "T": 1_048_576,
+        "T_out": 8_192,
+        "pp1000t_prompt": 75,  # $0.075 / 1M tokens
+        "pp1000t_generated": 300,  # $0.30 / 1M tokens
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+    },
+    "gemini-1.5-flash": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-1.5-flash",
+        "T": 1_048_576,
+        "T_out": 8_192,
+        "pp1000t_prompt": 75,  # $0.075 / 1M tokens
+        "pp1000t_generated": 300,  # $0.30 / 1M tokens
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+    },
+    "gemini-1.5-flash-8b": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-1.5-flash-8b",
+        "T": 1_048_576,
+        "T_out": 8_192,
+        "pp1000t_prompt": 37.5,  # $0.0375 / 1M tokens
+        "pp1000t_generated": 150,  # $0.15 / 1M tokens
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+    },
+    "gemini-1.5-pro": {
+        "backend": "litellm",
+        "provider": "gemini",
+        "tokenizer_path": "google/gemma-7b",
+        "resolve_as": "gemini-1.5-pro",
+        "T": 2_097_152,
+        "T_out": 8_192,
+        "pp1000t_prompt": 1250,  # $1.25 / 1M tokens
+        "pp1000t_generated": 5000,  # $5.00 / 1M tokens
+        "filter_caps": ["chat", "tools", "completion", "multimodal"],
+    }}
diff --git a/refact_utils/finetune/utils.py b/refact_utils/finetune/utils.py
@@ -112,6 +112,9 @@ def _add_results_for_passthrough_provider(provider: str) -> None:
     if data.get('cerebras_api_enable'):
         _add_results_for_passthrough_provider('cerebras')
 
+    if data.get('gemini_api_enable'):
+        _add_results_for_passthrough_provider('gemini')
+
     if data.get('groq_api_enable'):
         _add_results_for_passthrough_provider('groq')
 

diff --git a/refact_webgui/webgui/selfhost_fastapi_completions.py b/refact_webgui/webgui/selfhost_fastapi_completions.py
@@ -13,6 +13,7 @@
 from fastapi import APIRouter, HTTPException, Query, Header
 from fastapi.responses import Response, StreamingResponse
 
+from refact_utils.huggingface.utils import huggingface_hub_token
 from refact_utils.scripts import env
 from refact_utils.finetune.utils import running_models_and_loras
 from refact_webgui.webgui.selfhost_model_resolve import resolve_model_context_size
@@ -233,6 +234,7 @@ def _integrations_env_setup(env_var_name: str, api_key_name: str, api_enable_nam
         _integrations_env_setup("ANTHROPIC_API_KEY", "anthropic_api_key", "anthropic_api_enable")
         _integrations_env_setup("GROQ_API_KEY", "groq_api_key", "groq_api_enable")
         _integrations_env_setup("CEREBRAS_API_KEY", "cerebras_api_key", "cerebras_api_enable")
+        _integrations_env_setup("GEMINI_API_KEY", "gemini_api_key", "gemini_api_enable")
 
     def _models_available_dict_rewrite(self, models_available: List[str]) -> Dict[str, Any]:
         rewrite_dict = {}
@@ -337,7 +339,10 @@ async def _passthrough_tokenizer(self, model_path: str) -> str:
         try:
             async with aiohttp.ClientSession() as session:
                 tokenizer_url = f"https://huggingface.co/{model_path}/resolve/main/tokenizer.json"
-                async with session.get(tokenizer_url) as resp:
+                headers = {}
+                if hf_token := huggingface_hub_token():
+                    headers["Authorization"] = f"Bearer {hf_token}"
+                async with session.get(tokenizer_url, headers=headers) as resp:
                     return await resp.text()
         except:
             raise HTTPException(404, detail=f"can't load tokenizer.json for passthrough {model_path}")

diff --git a/refact_webgui/webgui/selfhost_model_assigner.py b/refact_webgui/webgui/selfhost_model_assigner.py
@@ -186,6 +186,7 @@ def first_run(self):
             "anthropic_api_enable": False,
             "groq_api_enable": False,
             "cerebras_api_enable": False,
+            "gemini_api_enable": False,
         }
         self.models_to_watchdog_configs(default_config)
 
@@ -259,6 +260,7 @@ def model_assignment(self):
             j = json.load(open(env.CONFIG_INFERENCE, "r"))
             j["groq_api_enable"] = j.get("groq_api_enable", False)
             j["cerebras_api_enable"] = j.get("cerebras_api_enable", False)
+            j["gemini_api_enable"] = j.get("gemini_api_enable", False)
         else:
             j = {"model_assign": {}}
 

diff --git a/refact_webgui/webgui/selfhost_queue.py b/refact_webgui/webgui/selfhost_queue.py
@@ -68,6 +68,8 @@ def _add_models_for_passthrough_provider(provider):
                 _add_models_for_passthrough_provider('groq')
             if j.get("cerebras_api_enable"):
                 _add_models_for_passthrough_provider('cerebras')
+            if j.get("gemini_api_enable"):
+                _add_models_for_passthrough_provider('gemini')
 
         return self._models_available
 

diff --git a/refact_webgui/webgui/selfhost_static.py b/refact_webgui/webgui/selfhost_static.py
@@ -23,6 +23,7 @@ def __init__(self, *args, **kwargs):
         ]
 
     async def _index(self):
+        print(self.static_folders)
         for spath in self.static_folders:
             fn = os.path.join(spath, "index.html")
             if os.path.exists(fn):

diff --git a/refact_webgui/webgui/static/tab-model-hosting.html b/refact_webgui/webgui/static/tab-model-hosting.html
@@ -46,6 +46,11 @@ <h3>3rd Party APIs</h3>
       <input class="form-check-input" type="checkbox" role="switch" id="enable_cerebras">
       <label class="form-check-label" for="enable_cerebras">Enable Cerebras API</label>
     </div>
+    <div class="form-check form-switch">
+      <input class="form-check-input" type="checkbox" role="switch" id="enable_gemini">
+      <label class="form-check-label" for="enable_gemini">Enable Gemini API</label>
+    </div>
+
     <div class="chat-enabler-status">
       To enable Chat GPT add your API key in the <span id="redirect2credentials" class="main-tab-button fake-link" data-tab="settings">API Keys tab</span>.
     </div>

diff --git a/refact_webgui/webgui/static/tab-model-hosting.js b/refact_webgui/webgui/static/tab-model-hosting.js
@@ -119,6 +119,8 @@ function get_models()
         integration_switch_init('enable_anthropic', models_data['anthropic_api_enable']);
         integration_switch_init('enable_groq', models_data['groq_api_enable']);
         integration_switch_init('enable_cerebras', models_data['cerebras_api_enable']);
+        integration_switch_init('enable_gemini', models_data['gemini_api_enable']);
+
 
         const more_gpus_notification = document.querySelector('.model-hosting-error');
         if(data.hasOwnProperty('more_models_than_gpus') && data.more_models_than_gpus) {
@@ -144,6 +146,8 @@ function save_model_assigned() {
     const anthropic_enable = document.querySelector('#enable_anthropic');
     const groq_enable = document.querySelector('#enable_groq');
     const cerebras_enable = document.querySelector('#enable_cerebras');
+    const gemini_enable = document.querySelector('#enable_gemini');
+
     const data = {
         model_assign: {
             ...models_data.model_assign,
@@ -152,6 +156,7 @@ function save_model_assigned() {
         anthropic_api_enable: anthropic_enable.checked,
         groq_api_enable: groq_enable.checked,
         cerebras_api_enable: cerebras_enable.checked,
+        gemini_api_enable: gemini_enable.checked,
     };
     console.log(data);
     fetch("/tab-host-models-assign", {

diff --git a/refact_webgui/webgui/static/tab-settings.html b/refact_webgui/webgui/static/tab-settings.html
@@ -10,6 +10,9 @@ <h2>API Integrations</h2>
     <input type="text" name="groq_api_key" value="" class="form-control" id="groq_api_key">
     <label for="cerebras_api_key" class="form-label mt-4">Cerebras API Key</label>
     <input type="text" name="cerebras_api_key" value="" class="form-control" id="cerebras_api_key">
+    <label for="gemini_api_key" class="form-label mt-4">Gemini API Key</label>
+    <input type="text" name="gemini_api_key" value="" class="form-control" id="gemini_api_key">
+
 <!--    <div class="d-flex flex-row-reverse mt-3"><button type="button" class="btn btn-primary" id="integrations-save">Save</button></div>-->
   </div>
 </div>

diff --git a/refact_webgui/webgui/static/tab-settings.js b/refact_webgui/webgui/static/tab-settings.js
@@ -174,6 +174,8 @@ function save_integration_api_keys() {
     const anthropic_api_key = document.getElementById('anthropic_api_key');
     const groq_api_key = document.getElementById('groq_api_key');
     const cerebras_api_key = document.getElementById('cerebras_api_key');
+    const gemini_api_key = document.getElementById("gemini_api_key");
+
     const huggingface_api_key = document.getElementById('huggingface_api_key');
     fetch("/tab-settings-integrations-save", {
         method: "POST",
@@ -185,6 +187,8 @@ function save_integration_api_keys() {
             anthropic_api_key: anthropic_api_key.getAttribute('data-value'),
             groq_api_key: groq_api_key.getAttribute('data-value'),
             cerebras_api_key: cerebras_api_key.getAttribute('data-value'),
+            gemini_api_key: gemini_api_key.getAttribute("data-value"),
+
             huggingface_api_key: huggingface_api_key.getAttribute('data-value'),
         })
     })
@@ -195,6 +199,8 @@ function save_integration_api_keys() {
         anthropic_api_key.setAttribute('data-saved-value', anthropic_api_key.getAttribute('data-value'))
         groq_api_key.setAttribute('data-saved-value', groq_api_key.getAttribute('data-value'))
         cerebras_api_key.setAttribute('data-saved-value', cerebras_api_key.getAttribute('data-value'))
+        gemini_api_key.setAttribute('data-saved-value', gemini_api_key.getAttribute('data-value'))
+
         huggingface_api_key.setAttribute('data-saved-value', huggingface_api_key.getAttribute('data-value'))
     });
 }
@@ -230,6 +236,8 @@ export function tab_settings_integrations_get() {
             integrations_input_init(document.getElementById('anthropic_api_key'), data['anthropic_api_key']);
             integrations_input_init(document.getElementById('groq_api_key'), data['groq_api_key']);
             integrations_input_init(document.getElementById('cerebras_api_key'), data['cerebras_api_key']);
+            integrations_input_init(document.getElementById('gemini_api_key'), data['gemini_api_key']);
+
             integrations_input_init(document.getElementById('huggingface_api_key'), data['huggingface_api_key']);
         });
 }

diff --git a/refact_webgui/webgui/tab_models_host.py b/refact_webgui/webgui/tab_models_host.py
@@ -44,6 +44,7 @@ class TabHostModelsAssign(BaseModel):
     anthropic_api_enable: bool = False
     groq_api_enable: bool = False
     cerebras_api_enable: bool = False
+    gemini_api_enable: bool = False
 
     model_config = ConfigDict(protected_namespaces=())  # avoiding model_ namespace protection
 

diff --git a/refact_webgui/webgui/tab_settings.py b/refact_webgui/webgui/tab_settings.py
@@ -24,6 +24,8 @@ class Integrations(BaseModel):
         anthropic_api_key: Optional[str] = None
         groq_api_key: Optional[str] = None
         cerebras_api_key: Optional[str] = None
+        gemini_api_key: Optional[str] = None
+
         huggingface_api_key: Optional[str] = None
 
     def __init__(self, models_assigner: ModelAssigner, *args, **kwargs):

diff --git a/setup.py b/setup.py
@@ -35,7 +35,7 @@ class PyPackage:
     "refact_webgui": PyPackage(
         requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic>=2",
                   "starlette==0.27.0", "uvicorn", "uvloop", "termcolor", "python-multipart", "more_itertools",
-                  "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.49.5"],
+                  "scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.55.3"],
         requires_packages=["refact_known_models", "refact_utils"],
         data=["webgui/static/*", "webgui/static/components/modals/*",
               "webgui/static/dashboards/*", "webgui/static/assets/*", "webgui/static/utils/*",]),
@@ -45,7 +45,7 @@ class PyPackage:
                   "bitsandbytes", "safetensors", "peft", "triton",
                   "torchinfo", "mpi4py", "deepspeed>=0.15.3",
                   "sentence-transformers", "huggingface-hub>=0.26.2",
-                  "aiohttp", "setproctitle"],
+                  "aiohttp", "setproctitle", "google-auth>=2.37.0"],
         optional=["ninja", "flash-attn"],
         requires_packages=["refact_known_models", "refact_data_pipeline",
                            "refact_webgui", "refact_utils"],