diff --git a/summarizer.py b/summarizer.py new file mode 100644 index 0000000..845a8d3 --- /dev/null +++ b/summarizer.py @@ -0,0 +1,56 @@ +# File name: summarizer.py +# This file deploys a summarizer app that summarizes and translates text. +# The Translator and Summarizer classes are defined as Ray Serve deployments. +# Both deployments use a pipeline of model T5-small from the transformers library. + +from starlette.requests import Request + +from ray import serve +from ray.serve.handle import RayServeHandle + +from transformers import pipeline + +# Creates a Ray Serve deployment for the translator +@serve.deployment +class Translator: + def __init__(self): + # Load model + self.model = pipeline("translation_en_to_fr", model="t5-small") + + def translate(self, text: str) -> str: + # Run inference + model_output = self.model(text) + + # Post-process output to return only the translation text + translation = model_output[0]["translation_text"] + + return translation + +# Creates a Ray Serve deployment for the summarizer +@serve.deployment +class Summarizer: + def __init__(self, translator: RayServeHandle): + # Load model + self.model = pipeline("summarization", model="t5-small") + self.translator = translator + + def summarize(self, text: str) -> str: + # Run inference + model_output = self.model(text, min_length=5, max_length=15) + + # Post-process output to return only the summary text + summary = model_output[0]["summary_text"] + + return summary + + async def __call__(self, http_request: Request) -> str: + english_text: str = await http_request.json() + summary = self.summarize(english_text) + + translation_ref = await self.translator.translate.remote(summary) + translation = await translation_ref + + return translation + +# Binds the translator and summarizer deployments to the same deployment. +summarizer = Summarizer.bind(Translator.bind()) \ No newline at end of file diff --git a/translator_autoscale.py b/translator_autoscale.py new file mode 100644 index 0000000..139e733 --- /dev/null +++ b/translator_autoscale.py @@ -0,0 +1,44 @@ +# File name: translator_autoscale.py +# This file deploys a translator application with Ray Serve autoscaling. +# The translator application uses a pre-trained model from the transformers library. + +from starlette.requests import Request + +from ray import serve + +from transformers import pipeline + +# Creates a Ray Serve deployment for a translator application with Ray Serve autoscaling configuration. +# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. +@serve.deployment( + autoscaling_config={ + "min_replicas": 1, + "initial_replicas": 2, + "max_replicas": 10, + "target_num_ongoing_requests_per_replica": 1, + "upscale_delay_s": 5, + "smoothing_factor": 1.5 + } +) +class Translator: + def __init__(self): + # Load model + self.model = pipeline("translation_en_to_fr", model="t5-small") + + def translate(self, text: str) -> str: + # Run inference + model_output = self.model(text) + + # Post-process output to return only the translation text + translation = model_output[0]["translation_text"] + + return translation + + # Asynchronously calls the translate function. + async def __call__(self, http_request: Request) -> str: + english_text: str = await http_request.json() + translation = self.translate(english_text) + return translation + +# Preparing the deployment for serving. +translator_app = Translator.bind() \ No newline at end of file diff --git a/translator_cpu.py b/translator_cpu.py new file mode 100644 index 0000000..aeea311 --- /dev/null +++ b/translator_cpu.py @@ -0,0 +1,35 @@ +# File name: translator.py +# This file deploys a translator application. +# The translator application uses a pre-trained model from the transformers library. + +from starlette.requests import Request + +from ray import serve + +from transformers import pipeline + +# Creates a Ray Serve deployment for a translator application. +# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. +@serve.deployment() +class Translator: + def __init__(self): + # Load model + self.model = pipeline("translation_en_to_fr", model="t5-small") + + def translate(self, text: str) -> str: + # Run inference + model_output = self.model(text) + + # Post-process output to return only the translation text + translation = model_output[0]["translation_text"] + + return translation + + # Asynchronously calls the translate function. + async def __call__(self, http_request: Request) -> str: + english_text: str = await http_request.json() + translation = self.translate(english_text) + return translation + +# Binds the translator application and the driver to the same deployment. +translator_app = Translator.bind() \ No newline at end of file diff --git a/translator_gpu.py b/translator_gpu.py new file mode 100644 index 0000000..117ae2b --- /dev/null +++ b/translator_gpu.py @@ -0,0 +1,36 @@ +# File name: translator_gpu.py +# This file deploys a translator application on gpu machines. +# The translator application uses a pre-trained model from the transformers library. + +from starlette.requests import Request + +from ray import serve + +from transformers import pipeline + +# Creates a Ray Serve deployment for a translator application. +# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. +@serve.deployment() +class Translator: + def __init__(self): + # Load model + # Device map allows for automatic placement of the model on the available GPUs + self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto") + + def translate(self, text: str) -> str: + # Run inference + model_output = self.model(text) + + # Post-process output to return only the translation text + translation = model_output[0]["translation_text"] + + return translation + + # Asynchronously calls the translate function. + async def __call__(self, http_request: Request) -> str: + english_text: str = await http_request.json() + translation = self.translate(english_text) + return translation + +# Deploy the Translator class +translator_app = Translator.bind() \ No newline at end of file diff --git a/translator_gpu_autoscale.py b/translator_gpu_autoscale.py new file mode 100644 index 0000000..29df8fb --- /dev/null +++ b/translator_gpu_autoscale.py @@ -0,0 +1,45 @@ +# File name: translator_autoscale.py +# This file deploys a translator application on gpu machines with autoscaling. +# The translator application uses a pre-trained model from the transformers library. + +from starlette.requests import Request + +from ray import serve + +from transformers import pipeline + +# Creates a Ray Serve deployment for a translator application. +# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. +@serve.deployment( + autoscaling_config={ + "min_replicas": 2, + "initial_replicas": 2, + "max_replicas": 8, + "upscale_delay_s": 2, + "downscale_delay_s": 60, + } +) +class Translator: + def __init__(self): + # Load model + # Device map allows for automatic placement of the model on the available GPUs + self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto") + # self.model = AutoModelForSeq2SeqLM.from_pretrained("t5-small", low_cpu_mem_usage=True) + + def translate(self, text: str) -> str: + # Run inference + model_output = self.model(text) + + # Post-process output to return only the translation text + translation = model_output[0]["translation_text"] + + return translation + + # Asynchronously calls the translate function. + async def __call__(self, http_request: Request) -> str: + english_text: str = await http_request.json() + translation = self.translate(english_text) + return translation + +# Deploy the Translator class +translator_app = Translator.bind() \ No newline at end of file diff --git a/translator_model.py b/translator_model.py new file mode 100644 index 0000000..aeea311 --- /dev/null +++ b/translator_model.py @@ -0,0 +1,35 @@ +# File name: translator.py +# This file deploys a translator application. +# The translator application uses a pre-trained model from the transformers library. + +from starlette.requests import Request + +from ray import serve + +from transformers import pipeline + +# Creates a Ray Serve deployment for a translator application. +# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information. +@serve.deployment() +class Translator: + def __init__(self): + # Load model + self.model = pipeline("translation_en_to_fr", model="t5-small") + + def translate(self, text: str) -> str: + # Run inference + model_output = self.model(text) + + # Post-process output to return only the translation text + translation = model_output[0]["translation_text"] + + return translation + + # Asynchronously calls the translate function. + async def __call__(self, http_request: Request) -> str: + english_text: str = await http_request.json() + translation = self.translate(english_text) + return translation + +# Binds the translator application and the driver to the same deployment. +translator_app = Translator.bind() \ No newline at end of file