Skip to content

Commit

Permalink
Merge pull request #190 from mobiusml/vllm_version
Browse files Browse the repository at this point in the history
Support for vLLM 0.6.3
  • Loading branch information
HRashidi authored Oct 30, 2024
2 parents 580090c + 57565d4 commit 91de5b5
Show file tree
Hide file tree
Showing 6 changed files with 1,010 additions and 954 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 ffmpeg locales

# Set the locale
Expand Down
2 changes: 1 addition & 1 deletion aana/deployments/hqq_text_generation_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ async def apply_config(self, config: dict[str, Any]):
self.dtype = Dtype.BFLOAT16

if config_obj.quantize_on_fly:
self.model_kwargs["device_map"] = "cpu"
self.model_kwargs.pop("device_map", None)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_id, torch_dtype=self.dtype.to_torch(), **self.model_kwargs
)
Expand Down
2 changes: 1 addition & 1 deletion aana/deployments/vllm_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,9 @@ async def generate_stream( # noqa: C901
else:
inputs = TokensPrompt(prompt_token_ids=prompt_token_ids)
results_generator = self.engine.generate(
inputs,
sampling_params=sampling_params_vllm,
request_id=request_id,
inputs=inputs,
)

num_returned = 0
Expand Down
Loading

0 comments on commit 91de5b5

Please sign in to comment.