Add files via upload

irisdingbj · Feb 22, 2024 · 29f95cb · 29f95cb
1 parent 4f26517
commit 29f95cb
Show file tree

Hide file tree

Showing 6 changed files with 251 additions and 0 deletions.
diff --git a/summarizer.py b/summarizer.py
@@ -0,0 +1,56 @@
+# File name: summarizer.py
+# This file deploys a summarizer app that summarizes and translates text.
+# The Translator and Summarizer classes are defined as Ray Serve deployments.
+# Both deployments use a pipeline of model T5-small from the transformers library.
+
+from starlette.requests import Request
+
+from ray import serve
+from ray.serve.handle import RayServeHandle
+
+from transformers import pipeline
+
+# Creates a Ray Serve deployment for the translator
+@serve.deployment
+class Translator:
+    def __init__(self):
+        # Load model
+        self.model = pipeline("translation_en_to_fr", model="t5-small")
+
+    def translate(self, text: str) -> str:
+        # Run inference
+        model_output = self.model(text)
+
+        # Post-process output to return only the translation text
+        translation = model_output[0]["translation_text"]
+
+        return translation
+
+# Creates a Ray Serve deployment for the summarizer
+@serve.deployment
+class Summarizer:
+    def __init__(self, translator: RayServeHandle):
+        # Load model
+        self.model = pipeline("summarization", model="t5-small")
+        self.translator = translator
+
+    def summarize(self, text: str) -> str:
+        # Run inference
+        model_output = self.model(text, min_length=5, max_length=15)
+
+        # Post-process output to return only the summary text
+        summary = model_output[0]["summary_text"]
+
+        return summary
+
+    async def __call__(self, http_request: Request) -> str:
+        english_text: str = await http_request.json()
+        summary = self.summarize(english_text)
+
+        translation_ref = await self.translator.translate.remote(summary)
+        translation = await translation_ref
+
+        return translation
+
+# Binds the translator and summarizer deployments to the same deployment.
+summarizer = Summarizer.bind(Translator.bind())
diff --git a/translator_autoscale.py b/translator_autoscale.py
@@ -0,0 +1,44 @@
+# File name: translator_autoscale.py
+# This file deploys a translator application with Ray Serve autoscaling.
+# The translator application uses a pre-trained model from the transformers library.
+
+from starlette.requests import Request
+
+from ray import serve
+
+from transformers import pipeline
+
+# Creates a Ray Serve deployment for a translator application with Ray Serve autoscaling configuration.
+# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
+@serve.deployment(
+    autoscaling_config={
+        "min_replicas": 1,
+        "initial_replicas": 2,
+        "max_replicas": 10,
+        "target_num_ongoing_requests_per_replica": 1,
+        "upscale_delay_s": 5,
+        "smoothing_factor": 1.5
+    }
+)
+class Translator:
+    def __init__(self):
+        # Load model
+        self.model = pipeline("translation_en_to_fr", model="t5-small")
+
+    def translate(self, text: str) -> str:
+        # Run inference
+        model_output = self.model(text)
+
+        # Post-process output to return only the translation text
+        translation = model_output[0]["translation_text"]
+
+        return translation
+
+    # Asynchronously calls the translate function.
+    async def __call__(self, http_request: Request) -> str:
+        english_text: str = await http_request.json()
+        translation = self.translate(english_text)
+        return translation
+
+# Preparing the deployment for serving.
+translator_app = Translator.bind()
diff --git a/translator_cpu.py b/translator_cpu.py
@@ -0,0 +1,35 @@
+# File name: translator.py
+# This file deploys a translator application.
+# The translator application uses a pre-trained model from the transformers library.
+
+from starlette.requests import Request
+
+from ray import serve
+
+from transformers import pipeline
+
+# Creates a Ray Serve deployment for a translator application.
+# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
+@serve.deployment()
+class Translator:
+    def __init__(self):
+        # Load model
+        self.model = pipeline("translation_en_to_fr", model="t5-small")
+
+    def translate(self, text: str) -> str:
+        # Run inference
+        model_output = self.model(text)
+
+        # Post-process output to return only the translation text
+        translation = model_output[0]["translation_text"]
+
+        return translation
+
+    # Asynchronously calls the translate function.
+    async def __call__(self, http_request: Request) -> str:
+        english_text: str = await http_request.json()
+        translation = self.translate(english_text)
+        return translation
+
+# Binds the translator application and the driver to the same deployment.
+translator_app = Translator.bind()
diff --git a/translator_gpu.py b/translator_gpu.py
@@ -0,0 +1,36 @@
+# File name: translator_gpu.py
+# This file deploys a translator application on gpu machines.
+# The translator application uses a pre-trained model from the transformers library.
+
+from starlette.requests import Request
+
+from ray import serve
+
+from transformers import pipeline
+
+# Creates a Ray Serve deployment for a translator application.
+# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
+@serve.deployment()
+class Translator:
+    def __init__(self):
+        # Load model
+        # Device map allows for automatic placement of the model on the available GPUs
+        self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto")
+
+    def translate(self, text: str) -> str:
+        # Run inference
+        model_output = self.model(text)
+
+        # Post-process output to return only the translation text
+        translation = model_output[0]["translation_text"]
+
+        return translation
+
+    # Asynchronously calls the translate function.
+    async def __call__(self, http_request: Request) -> str:
+        english_text: str = await http_request.json()
+        translation = self.translate(english_text)
+        return translation
+
+# Deploy the Translator class
+translator_app = Translator.bind()
diff --git a/translator_gpu_autoscale.py b/translator_gpu_autoscale.py
@@ -0,0 +1,45 @@
+# File name: translator_autoscale.py
+# This file deploys a translator application on gpu machines with autoscaling.
+# The translator application uses a pre-trained model from the transformers library.
+
+from starlette.requests import Request
+
+from ray import serve
+
+from transformers import pipeline
+
+# Creates a Ray Serve deployment for a translator application.
+# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
+@serve.deployment(
+    autoscaling_config={
+        "min_replicas": 2,
+        "initial_replicas": 2,
+        "max_replicas": 8,
+        "upscale_delay_s": 2,
+        "downscale_delay_s": 60,
+    }
+)
+class Translator:
+    def __init__(self):
+        # Load model
+        # Device map allows for automatic placement of the model on the available GPUs
+        self.model = pipeline("translation_en_to_fr", model="t5-small", device_map="auto")
+        # self.model = AutoModelForSeq2SeqLM.from_pretrained("t5-small", low_cpu_mem_usage=True)
+
+    def translate(self, text: str) -> str:
+        # Run inference
+        model_output = self.model(text)
+
+        # Post-process output to return only the translation text
+        translation = model_output[0]["translation_text"]
+
+        return translation
+
+    # Asynchronously calls the translate function.
+    async def __call__(self, http_request: Request) -> str:
+        english_text: str = await http_request.json()
+        translation = self.translate(english_text)
+        return translation
+
+# Deploy the Translator class
+translator_app = Translator.bind()
diff --git a/translator_model.py b/translator_model.py
@@ -0,0 +1,35 @@
+# File name: translator.py
+# This file deploys a translator application.
+# The translator application uses a pre-trained model from the transformers library.
+
+from starlette.requests import Request
+
+from ray import serve
+
+from transformers import pipeline
+
+# Creates a Ray Serve deployment for a translator application.
+# Refer to https://docs.ray.io/en/latest/serve/scaling-and-resource-allocation.html# for more information.
+@serve.deployment()
+class Translator:
+    def __init__(self):
+        # Load model
+        self.model = pipeline("translation_en_to_fr", model="t5-small")
+
+    def translate(self, text: str) -> str:
+        # Run inference
+        model_output = self.model(text)
+
+        # Post-process output to return only the translation text
+        translation = model_output[0]["translation_text"]
+
+        return translation
+
+    # Asynchronously calls the translate function.
+    async def __call__(self, http_request: Request) -> str:
+        english_text: str = await http_request.json()
+        translation = self.translate(english_text)
+        return translation
+
+# Binds the translator application and the driver to the same deployment.
+translator_app = Translator.bind()