Skip to content

Commit

Permalink
1.7.0 release updates (#454)
Browse files Browse the repository at this point in the history
* remove deprecated models

* add new openai models

* up passthrough limit

* up litellm version

* up refact version to 1.7.0

* upgrade cython
  • Loading branch information
mitya52 authored Sep 12, 2024
1 parent c5162ff commit 5411936
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 137 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ RUN cd /tmp/refact-lsp \
COPY . /tmp/app
RUN echo "refact $(git -C /tmp/app rev-parse HEAD)" >> /refact-build-info.txt
RUN pip install ninja
RUN pip install -U cython
RUN pip install /tmp/app -v --no-build-isolation && rm -rf /tmp/app

ENV REFACT_PERM_DIR "/perm_storage"
Expand Down
13 changes: 0 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,22 +106,9 @@ Extensions > Refact.ai Assistant > Settings > Infurl
| Model | Completion | Chat | Fine-tuning | [Deprecated](## "Will be removed in next versions") |
|---------------------------------------------------------------------------------------------------|------------|------|-------------|-----------------------------------------------------|
| [Refact/1.6B](https://huggingface.co/smallcloudai/Refact-1_6B-fim) | + | | + | |
| [starcoder/1b/base](https://huggingface.co/smallcloudai/starcoderbase-1b) | + | | + | + |
| [starcoder/3b/base](https://huggingface.co/smallcloudai/starcoderbase-3b) | + | | + | + |
| [starcoder/7b/base](https://huggingface.co/smallcloudai/starcoderbase-7b) | + | | + | + |
| [starcoder/15b/base](https://huggingface.co/TheBloke/starcoder-GPTQ) | + | | | + |
| [starcoder/15b/plus](https://huggingface.co/TheBloke/starcoderplus-GPTQ) | + | | | + |
| [starcoder2/3b/base](https://huggingface.co/bigcode/starcoder2-3b) | + | | + | |
| [starcoder2/7b/base](https://huggingface.co/bigcode/starcoder2-7b) | + | | + | |
| [starcoder2/15b/base](https://huggingface.co/bigcode/starcoder2-15b) | + | | + | |
| [wizardcoder/15b](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GPTQ) | + | | | + |
| [codellama/7b](https://huggingface.co/TheBloke/CodeLlama-7B-fp16) | + | | + | + |
| [starchat/15b/beta](https://huggingface.co/TheBloke/starchat-beta-GPTQ) | | + | | + |
| [wizardlm/7b](https://huggingface.co/TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ) | | + | | + |
| [wizardlm/13b](https://huggingface.co/TheBloke/WizardLM-13B-V1.1-GPTQ) | | + | | + |
| [wizardlm/30b](https://huggingface.co/TheBloke/WizardLM-30B-fp16) | | + | | + |
| [llama2/7b](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GPTQ) | | + | | + |
| [llama2/13b](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ) | | + | | + |
| [deepseek-coder/1.3b/base](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base) | + | | + | |
| [deepseek-coder/5.7b/mqa-base](https://huggingface.co/deepseek-ai/deepseek-coder-5.7bmqa-base) | + | | + | |
| [magicoder/6.7b](https://huggingface.co/TheBloke/Magicoder-S-DS-6.7B-GPTQ) | | + | | |
Expand Down
118 changes: 0 additions & 118 deletions refact_known_models/huggingface.py
Original file line number Diff line number Diff line change
@@ -1,122 +1,4 @@
huggingface_mini_db = {
"starcoder/15b/base": {
"backend": "autogptq",
"model_path": "TheBloke/starcoder-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 18000,
"T": 4096,
"filter_caps": ["completion"],
"deprecated": True,
},
"starcoder/15b/plus": {
"backend": "autogptq",
"model_path": "TheBloke/starcoderplus-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 18000,
"T": 4096,
"filter_caps": ["completion"],
"deprecated": True,
},
"starchat/15b/beta": {
"backend": "autogptq",
"model_path": "TheBloke/starchat-beta-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 18000,
"T": 4096,
"filter_caps": ["chat"],
"deprecated": True,
},
"starcoder/1b/base": {
"backend": "transformers",
"model_path": "smallcloudai/starcoderbase-1b",
"model_class_kwargs": {},
"required_memory_mb": 8000,
"T": 8192,
"filter_caps": ["completion", "finetune"],
"deprecated": True,
},
"starcoder/3b/base": {
"backend": "transformers",
"model_path": "smallcloudai/starcoderbase-3b",
"model_class_kwargs": {},
"required_memory_mb": 12000,
"T": 4096,
"filter_caps": ["completion", "finetune"],
"deprecated": True,
},
"starcoder/7b/base": {
"backend": "transformers",
"model_path": "smallcloudai/starcoderbase-7b",
"model_class_kwargs": {},
"required_memory_mb": 20000,
"T": 4096,
"filter_caps": ["completion", "finetune"],
"deprecated": True,
},
"wizardcoder/15b": {
"backend": "autogptq",
"model_path": "TheBloke/WizardCoder-15B-1.0-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 18000,
"T": 4096,
"filter_caps": ["completion"],
"deprecated": True,
},
"wizardlm/7b": {
"backend": "autogptq",
"model_path": "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 8000,
"T": 2048,
"filter_caps": ["chat"],
"deprecated": True,
},
"wizardlm/13b": {
"backend": "autogptq",
"model_path": "TheBloke/WizardLM-13B-V1.1-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 14000,
"T": 2048,
"filter_caps": ["chat"],
"deprecated": True,
},
"llama2/7b": {
"backend": "autogptq",
"model_path": "TheBloke/Llama-2-7b-Chat-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 8000,
"T": 2048,
"filter_caps": ["chat"],
"deprecated": True,
},
"llama2/13b": {
"backend": "autogptq",
"model_path": "TheBloke/Llama-2-13B-chat-GPTQ",
"model_class_kwargs": {},
"required_memory_mb": 14000,
"T": 2048,
"filter_caps": ["chat"],
"deprecated": True,
},
"codellama/7b": {
"backend": "transformers",
"model_path": "TheBloke/CodeLlama-7B-fp16",
"model_class_kwargs": {},
"required_memory_mb": 14000,
"T": 2048,
"filter_caps": ["completion"],
"deprecated": True,
},
"wizardlm/30b": {
"backend": "transformers",
"model_path": "TheBloke/WizardLM-30B-fp16",
"model_class_kwargs": {
"load_in_4bit": True,
},
"T": 2048,
"filter_caps": ["chat"],
"deprecated": True,
},
"deepseek-coder/1.3b/base": {
"backend": "transformers",
"model_path": "deepseek-ai/deepseek-coder-1.3b-base",
Expand Down
33 changes: 33 additions & 0 deletions refact_known_models/passthrough.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,37 @@
"pp1000t_generated": 15_000,
"filter_caps": ["chat", "tools"],
},
"gpt-4o-2024-05-13": {
"backend": "litellm",
"provider": "openai",
"tokenizer_path": "Xenova/gpt-4o",
"resolve_as": "gpt-4o-2024-05-13",
"T": 128_000,
"T_out": 4096,
"pp1000t_prompt": 5_000,
"pp1000t_generated": 15_000, # $15.00 / 1M tokens
"filter_caps": ["chat", "tools"],
},
"gpt-4o-2024-08-06": {
"backend": "litellm",
"provider": "openai",
"tokenizer_path": "Xenova/gpt-4o",
"resolve_as": "gpt-4o-2024-08-06",
"T": 128_000,
"T_out": 4096,
"pp1000t_prompt": 2_500,
"pp1000t_generated": 10_000, # $15.00 / 1M tokens
"filter_caps": ["chat", "tools"]
},
"gpt-4o-mini": {
"backend": "litellm",
"provider": "openai",
"tokenizer_path": "Xenova/gpt-4o",
"resolve_as": "gpt-4o-mini-2024-07-18",
"T": 128_000,
"T_out": 4096,
"pp1000t_prompt": 150,
"pp1000t_generated": 600, # $0.60 / 1M tokens
"filter_caps": ["chat", "tools"],
},
}
2 changes: 1 addition & 1 deletion refact_webgui/webgui/selfhost_model_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def resolve_model_context_size(model_name: str, model_assigner: ModelAssigner) -
if model_name in model_assigner.models_db:
return model_assigner.model_assignment["model_assign"][model_name]["n_ctx"]

PASSTHROUGH_MAX_TOKENS_LIMIT = 16_000
PASSTHROUGH_MAX_TOKENS_LIMIT = 64_000

if model_name in model_assigner.passthrough_mini_db:
if max_tokens := model_assigner.passthrough_mini_db[model_name].get('T'):
Expand Down
7 changes: 2 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ class PyPackage:
"refact_webgui": PyPackage(
requires=["aiohttp", "aiofiles", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic>=2",
"starlette==0.27.0", "uvicorn", "uvloop", "termcolor", "python-multipart", "more_itertools",
"scyllapy==1.3.0", "pandas>=2.0.3",
# NOTE: litellm has bug with anthropic streaming, so we're staying on this version for now
"litellm==1.42.0",
],
"scyllapy==1.3.0", "pandas>=2.0.3", "litellm>=1.44.24"],
requires_packages=["refact_known_models", "refact_utils"],
data=["webgui/static/*", "webgui/static/components/modals/*",
"webgui/static/dashboards/*", "webgui/static/assets/*", "webgui/static/utils/*",]),
Expand Down Expand Up @@ -94,7 +91,7 @@ def get_install_requires(packages):

setup(
name="refact-self-hosting",
version="1.6.4",
version="1.7.0",
py_modules=list(setup_packages.keys()),
package_data={
name: py_package.data
Expand Down

0 comments on commit 5411936

Please sign in to comment.