-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4f26517
commit 29f95cb
Showing
6 changed files
with
251 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# File name: summarizer.py | ||
# This file deploys a summarizer app that summarizes and translates text. | ||
# The Translator and Summarizer classes are defined as Ray Serve deployments. | ||
# Both deployments use a pipeline of model T5-small from the transformers library. | ||
|
||
from starlette.requests import Request | ||
|
||
from ray import serve | ||
from ray.serve.handle import RayServeHandle | ||
|
||
from transformers import pipeline | ||
|
||
# Creates a Ray Serve deployment for the translator | ||
@serve.deployment | ||
class Translator: | ||
def __init__(self): | ||
# Load model | ||
self.model = pipeline("translation_en_to_fr", model="t5-small") | ||
|
||
def translate(self, text: str) -> str: | ||
# Run inference | ||
model_output = self.model(text) | ||
|
||
# Post-process output to return only the translation text | ||
translation = model_output[0]["translation_text"] | ||
|
||
return translation | ||
|
||
# Creates a Ray Serve deployment for the summarizer | ||
@serve.deployment | ||
class Summarizer: | ||
def __init__(self, translator: RayServeHandle): | ||
# Load model | ||
self.model = pipeline("summarization", model="t5-small") | ||
self.translator = translator | ||
|
||
def summarize(self, text: str) -> str: | ||
# Run inference | ||
model_output = self.model(text, min_length=5, max_length=15) | ||
|
||
# Post-process output to return only the summary text | ||
summary = model_output[0]["summary_text"] | ||
|
||
return summary | ||
|
||
async def __call__(self, http_request: Request) -> str: | ||
english_text: str = await http_request.json() | ||
summary = self.summarize(english_text) | ||
|
||
translation_ref = await self.translator.translate.remote(summary) | ||
translation = await translation_ref | ||
|
||
return translation | ||
|
||
# Binds the translator and summarizer deployments to the same deployment. | ||
summarizer = Summarizer.bind(Translator.bind()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# File name: translator_autoscale.py | ||
# This file deploys a translator application with Ray Serve autoscaling. | ||
# The translator application uses a pre-trained model from the transformers library. | ||
|
||
from starlette.requests import Request | ||
|
||
from ray import serve | ||
|
||
from transformers import pipeline | ||
|
||
# Creates a Ray Serve deployment for a translator application with Ray Serve autoscaling configuration. | ||
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. | ||
@serve.deployment( | ||
autoscaling_config={ | ||
"min_replicas": 1, | ||
"initial_replicas": 2, | ||
"max_replicas": 10, | ||
"target_num_ongoing_requests_per_replica": 1, | ||
"upscale_delay_s": 5, | ||
"smoothing_factor": 1.5 | ||
} | ||
) | ||
class Translator: | ||
def __init__(self): | ||
# Load model | ||
self.model = pipeline("translation_en_to_fr", model="t5-small") | ||
|
||
def translate(self, text: str) -> str: | ||
# Run inference | ||
model_output = self.model(text) | ||
|
||
# Post-process output to return only the translation text | ||
translation = model_output[0]["translation_text"] | ||
|
||
return translation | ||
|
||
# Asynchronously calls the translate function. | ||
async def __call__(self, http_request: Request) -> str: | ||
english_text: str = await http_request.json() | ||
translation = self.translate(english_text) | ||
return translation | ||
|
||
# Preparing the deployment for serving. | ||
translator_app = Translator.bind() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# File name: translator.py | ||
# This file deploys a translator application. | ||
# The translator application uses a pre-trained model from the transformers library. | ||
|
||
from starlette.requests import Request | ||
|
||
from ray import serve | ||
|
||
from transformers import pipeline | ||
|
||
# Creates a Ray Serve deployment for a translator application. | ||
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. | ||
@serve.deployment() | ||
class Translator: | ||
def __init__(self): | ||
# Load model | ||
self.model = pipeline("translation_en_to_fr", model="t5-small") | ||
|
||
def translate(self, text: str) -> str: | ||
# Run inference | ||
model_output = self.model(text) | ||
|
||
# Post-process output to return only the translation text | ||
translation = model_output[0]["translation_text"] | ||
|
||
return translation | ||
|
||
# Asynchronously calls the translate function. | ||
async def __call__(self, http_request: Request) -> str: | ||
english_text: str = await http_request.json() | ||
translation = self.translate(english_text) | ||
return translation | ||
|
||
# Binds the translator application and the driver to the same deployment. | ||
translator_app = Translator.bind() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# File name: translator_gpu.py | ||
# This file deploys a translator application on gpu machines. | ||
# The translator application uses a pre-trained model from the transformers library. | ||
|
||
from starlette.requests import Request | ||
|
||
from ray import serve | ||
|
||
from transformers import pipeline | ||
|
||
# Creates a Ray Serve deployment for a translator application. | ||
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. | ||
@serve.deployment() | ||
class Translator: | ||
def __init__(self): | ||
# Load model | ||
# Device map allows for automatic placement of the model on the available GPUs | ||
self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto") | ||
|
||
def translate(self, text: str) -> str: | ||
# Run inference | ||
model_output = self.model(text) | ||
|
||
# Post-process output to return only the translation text | ||
translation = model_output[0]["translation_text"] | ||
|
||
return translation | ||
|
||
# Asynchronously calls the translate function. | ||
async def __call__(self, http_request: Request) -> str: | ||
english_text: str = await http_request.json() | ||
translation = self.translate(english_text) | ||
return translation | ||
|
||
# Deploy the Translator class | ||
translator_app = Translator.bind() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# File name: translator_autoscale.py | ||
# This file deploys a translator application on gpu machines with autoscaling. | ||
# The translator application uses a pre-trained model from the transformers library. | ||
|
||
from starlette.requests import Request | ||
|
||
from ray import serve | ||
|
||
from transformers import pipeline | ||
|
||
# Creates a Ray Serve deployment for a translator application. | ||
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. | ||
@serve.deployment( | ||
autoscaling_config={ | ||
"min_replicas": 2, | ||
"initial_replicas": 2, | ||
"max_replicas": 8, | ||
"upscale_delay_s": 2, | ||
"downscale_delay_s": 60, | ||
} | ||
) | ||
class Translator: | ||
def __init__(self): | ||
# Load model | ||
# Device map allows for automatic placement of the model on the available GPUs | ||
self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto") | ||
# self.model = AutoModelForSeq2SeqLM.from_pretrained("t5-small", low_cpu_mem_usage=True) | ||
|
||
def translate(self, text: str) -> str: | ||
# Run inference | ||
model_output = self.model(text) | ||
|
||
# Post-process output to return only the translation text | ||
translation = model_output[0]["translation_text"] | ||
|
||
return translation | ||
|
||
# Asynchronously calls the translate function. | ||
async def __call__(self, http_request: Request) -> str: | ||
english_text: str = await http_request.json() | ||
translation = self.translate(english_text) | ||
return translation | ||
|
||
# Deploy the Translator class | ||
translator_app = Translator.bind() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# File name: translator.py | ||
# This file deploys a translator application. | ||
# The translator application uses a pre-trained model from the transformers library. | ||
|
||
from starlette.requests import Request | ||
|
||
from ray import serve | ||
|
||
from transformers import pipeline | ||
|
||
# Creates a Ray Serve deployment for a translator application. | ||
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. | ||
@serve.deployment() | ||
class Translator: | ||
def __init__(self): | ||
# Load model | ||
self.model = pipeline("translation_en_to_fr", model="t5-small") | ||
|
||
def translate(self, text: str) -> str: | ||
# Run inference | ||
model_output = self.model(text) | ||
|
||
# Post-process output to return only the translation text | ||
translation = model_output[0]["translation_text"] | ||
|
||
return translation | ||
|
||
# Asynchronously calls the translate function. | ||
async def __call__(self, http_request: Request) -> str: | ||
english_text: str = await http_request.json() | ||
translation = self.translate(english_text) | ||
return translation | ||
|
||
# Binds the translator application and the driver to the same deployment. | ||
translator_app = Translator.bind() |