Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
irisdingbj authored Feb 22, 2024
1 parent 4f26517 commit 29f95cb
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 0 deletions.
56 changes: 56 additions & 0 deletions summarizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# File name: summarizer.py
# This file deploys a summarizer app that summarizes and translates text.
# The Translator and Summarizer classes are defined as Ray Serve deployments.
# Both deployments use a pipeline of model T5-small from the transformers library.

from starlette.requests import Request

from ray import serve
from ray.serve.handle import RayServeHandle

from transformers import pipeline

# Creates a Ray Serve deployment for the translator
@serve.deployment
class Translator:
def __init__(self):
# Load model
self.model = pipeline("translation_en_to_fr", model="t5-small")

def translate(self, text: str) -> str:
# Run inference
model_output = self.model(text)

# Post-process output to return only the translation text
translation = model_output[0]["translation_text"]

return translation

# Creates a Ray Serve deployment for the summarizer
@serve.deployment
class Summarizer:
def __init__(self, translator: RayServeHandle):
# Load model
self.model = pipeline("summarization", model="t5-small")
self.translator = translator

def summarize(self, text: str) -> str:
# Run inference
model_output = self.model(text, min_length=5, max_length=15)

# Post-process output to return only the summary text
summary = model_output[0]["summary_text"]

return summary

async def __call__(self, http_request: Request) -> str:
english_text: str = await http_request.json()
summary = self.summarize(english_text)

translation_ref = await self.translator.translate.remote(summary)
translation = await translation_ref

return translation

# Binds the translator and summarizer deployments to the same deployment.
summarizer = Summarizer.bind(Translator.bind())
44 changes: 44 additions & 0 deletions translator_autoscale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# File name: translator_autoscale.py
# This file deploys a translator application with Ray Serve autoscaling.
# The translator application uses a pre-trained model from the transformers library.

from starlette.requests import Request

from ray import serve

from transformers import pipeline

# Creates a Ray Serve deployment for a translator application with Ray Serve autoscaling configuration.
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
@serve.deployment(
autoscaling_config={
"min_replicas": 1,
"initial_replicas": 2,
"max_replicas": 10,
"target_num_ongoing_requests_per_replica": 1,
"upscale_delay_s": 5,
"smoothing_factor": 1.5
}
)
class Translator:
def __init__(self):
# Load model
self.model = pipeline("translation_en_to_fr", model="t5-small")

def translate(self, text: str) -> str:
# Run inference
model_output = self.model(text)

# Post-process output to return only the translation text
translation = model_output[0]["translation_text"]

return translation

# Asynchronously calls the translate function.
async def __call__(self, http_request: Request) -> str:
english_text: str = await http_request.json()
translation = self.translate(english_text)
return translation

# Preparing the deployment for serving.
translator_app = Translator.bind()
35 changes: 35 additions & 0 deletions translator_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# File name: translator.py
# This file deploys a translator application.
# The translator application uses a pre-trained model from the transformers library.

from starlette.requests import Request

from ray import serve

from transformers import pipeline

# Creates a Ray Serve deployment for a translator application.
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
@serve.deployment()
class Translator:
def __init__(self):
# Load model
self.model = pipeline("translation_en_to_fr", model="t5-small")

def translate(self, text: str) -> str:
# Run inference
model_output = self.model(text)

# Post-process output to return only the translation text
translation = model_output[0]["translation_text"]

return translation

# Asynchronously calls the translate function.
async def __call__(self, http_request: Request) -> str:
english_text: str = await http_request.json()
translation = self.translate(english_text)
return translation

# Binds the translator application and the driver to the same deployment.
translator_app = Translator.bind()
36 changes: 36 additions & 0 deletions translator_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# File name: translator_gpu.py
# This file deploys a translator application on gpu machines.
# The translator application uses a pre-trained model from the transformers library.

from starlette.requests import Request

from ray import serve

from transformers import pipeline

# Creates a Ray Serve deployment for a translator application.
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
@serve.deployment()
class Translator:
def __init__(self):
# Load model
# Device map allows for automatic placement of the model on the available GPUs
self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto")

def translate(self, text: str) -> str:
# Run inference
model_output = self.model(text)

# Post-process output to return only the translation text
translation = model_output[0]["translation_text"]

return translation

# Asynchronously calls the translate function.
async def __call__(self, http_request: Request) -> str:
english_text: str = await http_request.json()
translation = self.translate(english_text)
return translation

# Deploy the Translator class
translator_app = Translator.bind()
45 changes: 45 additions & 0 deletions translator_gpu_autoscale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# File name: translator_autoscale.py
# This file deploys a translator application on gpu machines with autoscaling.
# The translator application uses a pre-trained model from the transformers library.

from starlette.requests import Request

from ray import serve

from transformers import pipeline

# Creates a Ray Serve deployment for a translator application.
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
@serve.deployment(
autoscaling_config={
"min_replicas": 2,
"initial_replicas": 2,
"max_replicas": 8,
"upscale_delay_s": 2,
"downscale_delay_s": 60,
}
)
class Translator:
def __init__(self):
# Load model
# Device map allows for automatic placement of the model on the available GPUs
self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto")
# self.model = AutoModelForSeq2SeqLM.from_pretrained("t5-small", low_cpu_mem_usage=True)

def translate(self, text: str) -> str:
# Run inference
model_output = self.model(text)

# Post-process output to return only the translation text
translation = model_output[0]["translation_text"]

return translation

# Asynchronously calls the translate function.
async def __call__(self, http_request: Request) -> str:
english_text: str = await http_request.json()
translation = self.translate(english_text)
return translation

# Deploy the Translator class
translator_app = Translator.bind()
35 changes: 35 additions & 0 deletions translator_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# File name: translator.py
# This file deploys a translator application.
# The translator application uses a pre-trained model from the transformers library.

from starlette.requests import Request

from ray import serve

from transformers import pipeline

# Creates a Ray Serve deployment for a translator application.
# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
@serve.deployment()
class Translator:
def __init__(self):
# Load model
self.model = pipeline("translation_en_to_fr", model="t5-small")

def translate(self, text: str) -> str:
# Run inference
model_output = self.model(text)

# Post-process output to return only the translation text
translation = model_output[0]["translation_text"]

return translation

# Asynchronously calls the translate function.
async def __call__(self, http_request: Request) -> str:
english_text: str = await http_request.json()
translation = self.translate(english_text)
return translation

# Binds the translator application and the driver to the same deployment.
translator_app = Translator.bind()

0 comments on commit 29f95cb

Please sign in to comment.