Skip to content

Commit

Permalink
更新infinity==0.0.73 vllm==0.6.5
Browse files Browse the repository at this point in the history
  • Loading branch information
shell-nlp committed Dec 20, 2024
1 parent 1dba906 commit 4a40bf6
Show file tree
Hide file tree
Showing 5 changed files with 409 additions and 154 deletions.
8 changes: 5 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@ services:
context: .
dockerfile: Dockerfile.copy
image: gpt_server:v0.4.0
shm_size: '4g' # 设置共享内存为4GB
shm_size: '4g' # 设置共享内存为4GB
container_name: gpt_server
restart: always
# network_mode: host
ports:
- "8082:8082"
- 8082:8082
environment:
- TZ:Asia/Shanghai # 设置中国时区
volumes:
- "/home/dev/model/:/home/dev/model/"
- /home/dev/model/:/home/dev/model/
deploy:
resources:
reservations:
Expand Down
17 changes: 14 additions & 3 deletions gpt_server/model_worker/embedding_infinity.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(

async def astart(self):
await self.engine.astart()

def generate_stream_gate(self, params):
pass

Expand All @@ -76,10 +76,21 @@ async def get_embeddings(self, params):
embedding = [embedding.tolist() for embedding in embeddings]
elif self.mode == "rerank":
query = params.get("query", None)
scores, usage = await self.engine.rerank(
ranking, usage = await self.engine.rerank(
query=query, docs=texts, raw_scores=False
)
embedding = [[float(score)] for score in scores]
ranking = [
{
"index": i.index,
"relevance_score": i.relevance_score,
"document": i.document,
}
for i in ranking
]
ranking.sort(key=lambda x: x["index"])
embedding = [
[round(float(score["relevance_score"]), 6)] for score in ranking
]
ret["embedding"] = embedding
ret["token_num"] = usage
return ret
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "gpt_server"
version = "0.3.2"
version = "0.3.5"
description = "gpt_server是一个用于生产级部署LLMs或Embedding的开源框架。"
readme = "README.md"
license = { text = "Apache 2.0" }
Expand All @@ -12,7 +12,7 @@ dependencies = [
"ffmpy",
"fschat==0.2.36",
"gradio==4.26.0",
"infinity-emb[all]==0.0.53",
"infinity-emb[all]==0.0.73",
"lmdeploy==0.6.2",
"loguru>=0.7.2",
"openai==1.55.3",
Expand All @@ -21,7 +21,7 @@ dependencies = [
"torch==2.5.1",
"torchvision==0.20.1",
"transformers==4.45.2",
"vllm==0.6.4.post1",
"vllm==0.6.5",
"qwen_vl_utils",
"evalscope[perf]==0.7.0",
"modelscope==1.20.1",
Expand All @@ -33,6 +33,7 @@ override-dependencies = [
"torchvision==0.20.1",
"torch==2.5.1",
"triton",
"outlines==0.1.11",

]

Expand Down
Loading

0 comments on commit 4a40bf6

Please sign in to comment.