Merge branch 'main' into basic_video_processing

mobiusml · Nov 10, 2023 · 3337777 · 3337777
2 parents 9776848 + 7d7ad12
commit 3337777
Show file tree

Hide file tree

Showing 22 changed files with 316 additions and 65 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,29 @@
+---
+name: Bug report
+about: Report a malfunction, glitch, or error in the application. This includes any
+  performance-related issues that may arise.
+title: "[BUG]"
+labels: bug
+assignees: ''
+
+---
+
+### Bug Description
+- Brief summary of the issue
+
+### Steps to Reproduce
+1.
+2.
+3.
+
+### Expected Behavior
+- What should have happened?
+
+### Actual Behavior
+- What actually happened?
+
+### Performance Details (if applicable)
+- Specifics of the performance issue encountered
+
+### Environment
+- Version (commit hash, tag, branch)
diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
@@ -0,0 +1,21 @@
+---
+name: Documentation
+about: Propose updates or corrections to both internal development documentation and
+  client-facing documentation.
+title: "[DOCS]"
+labels: documentation
+assignees: ''
+
+---
+
+### Documentation Area (Development/Client)
+- Specify the area of documentation
+
+### Current Content
+- Quote the current content or describe the issue
+
+### Proposed Changes
+- Detail the proposed changes
+
+### Reasons for Changes
+- Why these changes will improve the documentation
diff --git a/.github/ISSUE_TEMPLATE/enhancement.md b/.github/ISSUE_TEMPLATE/enhancement.md
@@ -0,0 +1,17 @@
+---
+name: Enhancement
+about: Recommend improvement to existing features and suggest code quality improvement.
+title: "[ENHANCEMENT]"
+labels: enhancement
+assignees: ''
+
+---
+
+### Enhancement Description
+- Overview of the enhancement
+
+### Advantages
+- Benefits of implementing this enhancement
+
+### Possible Implementation
+- Suggested methods for implementing the enhancement
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,17 @@
+---
+name: Feature request
+about: Suggest new functionalities or modifications to enhance the application's capabilities.
+title: "[FEATURE REQUEST]"
+labels: feature request
+assignees: ''
+
+---
+
+### Feature Summary
+- Concise description of the feature
+
+### Justification/Rationale
+- Why is the feature beneficial?
+
+### Proposed Implementation (if any)
+- How do you envision this feature's implementation?
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,18 @@
+---
+name: Question
+about: Ask questions to clarify project-related queries, seek further information,
+  or understand functionalities better.
+title: "[QUESTION]"
+labels: question
+assignees: ''
+
+---
+
+### Context
+- Background or context of the question
+
+### Question
+- Specific question being asked
+
+### What You've Tried
+- List any solutions or research already conducted
diff --git a/.github/ISSUE_TEMPLATE/testing.md b/.github/ISSUE_TEMPLATE/testing.md
@@ -0,0 +1,18 @@
+---
+name: Testing
+about: Address needs for creating new tests, enhancing existing tests, or reporting
+  test failures.
+title: "[TESTS]"
+labels: tests
+assignees: ''
+
+---
+
+### Testing Requirement
+- Describe the testing need or issue
+
+### Test Scenarios
+- Detail specific test scenarios to be addressed
+
+### Acceptance Criteria
+- What are the criteria for the test to be considered successful?
diff --git a/aana/configs/pipeline.py b/aana/configs/pipeline.py
@@ -172,7 +172,7 @@
         "name": "hf_blip2_opt_2_7b",
         "type": "ray_deployment",
         "deployment_name": "hf_blip2_deployment_opt_2_7b",
-        "method": "generate_captions",
+        "method": "generate_batch",
         "inputs": [
             {
                 "name": "images",
@@ -248,7 +248,7 @@
         "name": "hf_blip2_opt_2_7b_video",
         "type": "ray_deployment",
         "deployment_name": "hf_blip2_deployment_opt_2_7b",
-        "method": "generate_captions",
+        "method": "generate_batch",
         "flatten_by": "video_batch.videos.[*].frames.[*]",
         "inputs": [
             {

diff --git a/aana/configs/settings.py b/aana/configs/settings.py
@@ -8,7 +8,7 @@ class Settings(BaseSettings):
 
     """
 
-    tmp_data_dir: Path = Path("/tmp/aana/data")
+    tmp_data_dir: Path = Path("/tmp/aana_data")
 
 
 settings = Settings()
diff --git a/aana/deployments/hf_blip2_deployment.py b/aana/deployments/hf_blip2_deployment.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List
+from typing import Any, Dict, List, TypedDict
 from pydantic import BaseModel, Field, validator
 from ray import serve
 import torch
@@ -48,6 +48,28 @@ def validate_dtype(cls, value: Dtype) -> Dtype:
         return value
 
 
+class CaptioningOutput(TypedDict):
+    """
+    The output of the captioning model.
+
+    Attributes:
+        caption (str): the caption
+    """
+
+    caption: str
+
+
+class CaptioningBatchOutput(TypedDict):
+    """
+    The output of the captioning model.
+
+    Attributes:
+        captions (List[str]): the list of captions
+    """
+
+    captions: List[str]
+
+
 @serve.deployment
 class HFBlip2Deployment(BaseDeployment):
     """
@@ -71,19 +93,19 @@ async def apply_config(self, config: Dict[str, Any]):
         # and process them in parallel
         self.batch_size = config_obj.batch_size
         self.num_processing_threads = config_obj.num_processing_threads
-        # The actual inference is done in _generate_captions()
+        # The actual inference is done in _generate()
         # We use lambda because BatchProcessor expects dict as input
-        # and we use **kwargs to unpack the dict into named arguments for _generate_captions()
+        # and we use **kwargs to unpack the dict into named arguments for _generate()
         self.batch_processor = BatchProcessor(
-            process_batch=lambda request: self._generate_captions(**request),
+            process_batch=lambda request: self._generate(**request),
             batch_size=self.batch_size,
             num_threads=self.num_processing_threads,
         )
 
         # Load the model and processor for BLIP2 from HuggingFace
         self.model_id = config_obj.model
         self.dtype = config_obj.dtype
-        self.torch_dtype = Dtype.to_torch(self.dtype)
+        self.torch_dtype = self.dtype.to_torch()
         self.model = Blip2ForConditionalGeneration.from_pretrained(
             self.model_id, torch_dtype=self.torch_dtype
         )
@@ -92,25 +114,44 @@ async def apply_config(self, config: Dict[str, Any]):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model.to(self.device)
 
-    async def generate_captions(self, **kwargs) -> Dict[str, Any]:
+    async def generate(self, image: Image) -> CaptioningOutput:
+        """
+        Generate captions for the given image.
+
+        Args:
+            image (Image): the image
+
+        Returns:
+            CaptioningOutput: the dictionary with one key "captions"
+                            and the list of captions for the image as value
+
+        Raises:
+            InferenceException: if the inference fails
+        """
+        captions: CaptioningBatchOutput = await self.batch_processor.process(
+            {"images": [image]}
+        )
+        return CaptioningOutput(caption=captions["captions"][0])
+
+    async def generate_batch(self, **kwargs) -> CaptioningBatchOutput:
         """
         Generate captions for the given images.
 
         Args:
             images (List[Image]): the images
 
         Returns:
-            Dict[str, Any]: the dictionary with one key "captions"
+            CaptioningBatchOutput: the dictionary with one key "captions"
                             and the list of captions for the images as value
 
         Raises:
             InferenceException: if the inference fails
         """
         # Call the batch processor to process the requests
-        # The actual inference is done in _generate_captions()
+        # The actual inference is done in _generate()
         return await self.batch_processor.process(kwargs)
 
-    def _generate_captions(self, images: List[Image]) -> Dict[str, Any]:
+    def _generate(self, images: List[Image]) -> CaptioningBatchOutput:
         """
         Generate captions for the given images.
 
@@ -120,7 +161,7 @@ def _generate_captions(self, images: List[Image]) -> Dict[str, Any]:
             images (List[Image]): the images
 
         Returns:
-            Dict[str, Any]: the dictionary with one key "captions"
+            CaptioningBatchOutput: the dictionary with one key "captions"
                             and the list of captions for the images as value
 
         Raises:
@@ -141,6 +182,6 @@ def _generate_captions(self, images: List[Image]) -> Dict[str, Any]:
             generated_texts = [
                 generated_text.strip() for generated_text in generated_texts
             ]
-            return {"captions": generated_texts}
+            return CaptioningBatchOutput(captions=generated_texts)
         except Exception as e:
             raise InferenceException(self.model_id) from e
diff --git a/aana/deployments/vllm_deployment.py b/aana/deployments/vllm_deployment.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, AsyncGenerator, Dict, List, Optional, TypedDict
 from pydantic import BaseModel, Field
 from ray import serve
 from vllm.engine.arg_utils import AsyncEngineArgs
@@ -34,6 +34,28 @@ class VLLMConfig(BaseModel):
     max_model_len: Optional[int] = Field(default=None)
 
 
+class LLMOutput(TypedDict):
+    """
+    The output of the LLM model.
+
+    Attributes:
+        text (str): the generated text
+    """
+
+    text: str
+
+
+class LLMBatchOutput(TypedDict):
+    """
+    The output of the LLM model for a batch of inputs.
+
+    Attributes:
+        texts (List[str]): the list of generated texts
+    """
+
+    texts: List[str]
+
+
 @serve.deployment
 class VLLMDeployment(BaseDeployment):
     """
@@ -78,7 +100,9 @@ async def apply_config(self, config: Dict[str, Any]):
         # create the engine
         self.engine = AsyncLLMEngine.from_engine_args(args)
 
-    async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
+    async def generate_stream(
+        self, prompt: str, sampling_params: SamplingParams
+    ) -> AsyncGenerator[LLMOutput, None]:
         """
         Generate completion for the given prompt and stream the results.
 
@@ -87,7 +111,7 @@ async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
             sampling_params (SamplingParams): the sampling parameters
 
         Yields:
-            dict: the generated text
+            LLMOutput: the dictionary with the key "text" and the generated text as the value
         """
         prompt = str(prompt)
         sampling_params = merged_options(self.default_sampling_params, sampling_params)
@@ -108,7 +132,7 @@ async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
             num_returned = 0
             async for request_output in results_generator:
                 text_output = request_output.outputs[0].text[num_returned:]
-                yield {"text": text_output}
+                yield LLMOutput(text=text_output)
                 num_returned += len(text_output)
         except GeneratorExit as e:
             # If the generator is cancelled, we need to cancel the request
@@ -118,7 +142,7 @@ async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
         except Exception as e:
             raise InferenceException(model_name=self.model) from e
 
-    async def generate(self, prompt: str, sampling_params: SamplingParams):
+    async def generate(self, prompt: str, sampling_params: SamplingParams) -> LLMOutput:
         """
         Generate completion for the given prompt.
 
@@ -127,14 +151,16 @@ async def generate(self, prompt: str, sampling_params: SamplingParams):
             sampling_params (SamplingParams): the sampling parameters
 
         Returns:
-            dict: the generated text
+            LLMOutput: the dictionary with the key "text" and the generated text as the value
         """
         generated_text = ""
         async for chunk in self.generate_stream(prompt, sampling_params):
             generated_text += chunk["text"]
-        return {"text": generated_text}
+        return LLMOutput(text=generated_text)
 
-    async def generate_batch(self, prompts: List[str], sampling_params: SamplingParams):
+    async def generate_batch(
+        self, prompts: List[str], sampling_params: SamplingParams
+    ) -> LLMBatchOutput:
         """
         Generate completion for the batch of prompts.
 
@@ -143,11 +169,12 @@ async def generate_batch(self, prompts: List[str], sampling_params: SamplingPara
             sampling_params (SamplingParams): the sampling parameters
 
         Returns:
-            dict: the generated texts
+            LLMBatchOutput: the dictionary with the key "texts"
+                            and the list of generated texts as the value
         """
         texts = []
         for prompt in prompts:
             text = await self.generate(prompt, sampling_params)
             texts.append(text["text"])
 
-        return {"texts": texts}
+        return LLMBatchOutput(texts=texts)