Skip to content

Commit

Permalink
Merge branch 'main' into basic_video_processing
Browse files Browse the repository at this point in the history
  • Loading branch information
movchan74 committed Nov 10, 2023
2 parents 9776848 + 7d7ad12 commit 3337777
Show file tree
Hide file tree
Showing 22 changed files with 316 additions and 65 deletions.
29 changes: 29 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
name: Bug report
about: Report a malfunction, glitch, or error in the application. This includes any
performance-related issues that may arise.
title: "[BUG]"
labels: bug
assignees: ''

---

### Bug Description
- Brief summary of the issue

### Steps to Reproduce
1.
2.
3.

### Expected Behavior
- What should have happened?

### Actual Behavior
- What actually happened?

### Performance Details (if applicable)
- Specifics of the performance issue encountered

### Environment
- Version (commit hash, tag, branch)
21 changes: 21 additions & 0 deletions .github/ISSUE_TEMPLATE/documentation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
name: Documentation
about: Propose updates or corrections to both internal development documentation and
client-facing documentation.
title: "[DOCS]"
labels: documentation
assignees: ''

---

### Documentation Area (Development/Client)
- Specify the area of documentation

### Current Content
- Quote the current content or describe the issue

### Proposed Changes
- Detail the proposed changes

### Reasons for Changes
- Why these changes will improve the documentation
17 changes: 17 additions & 0 deletions .github/ISSUE_TEMPLATE/enhancement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
name: Enhancement
about: Recommend improvement to existing features and suggest code quality improvement.
title: "[ENHANCEMENT]"
labels: enhancement
assignees: ''

---

### Enhancement Description
- Overview of the enhancement

### Advantages
- Benefits of implementing this enhancement

### Possible Implementation
- Suggested methods for implementing the enhancement
17 changes: 17 additions & 0 deletions .github/ISSUE_TEMPLATE/feature_request.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
name: Feature request
about: Suggest new functionalities or modifications to enhance the application's capabilities.
title: "[FEATURE REQUEST]"
labels: feature request
assignees: ''

---

### Feature Summary
- Concise description of the feature

### Justification/Rationale
- Why is the feature beneficial?

### Proposed Implementation (if any)
- How do you envision this feature's implementation?
18 changes: 18 additions & 0 deletions .github/ISSUE_TEMPLATE/question.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
name: Question
about: Ask questions to clarify project-related queries, seek further information,
or understand functionalities better.
title: "[QUESTION]"
labels: question
assignees: ''

---

### Context
- Background or context of the question

### Question
- Specific question being asked

### What You've Tried
- List any solutions or research already conducted
18 changes: 18 additions & 0 deletions .github/ISSUE_TEMPLATE/testing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
name: Testing
about: Address needs for creating new tests, enhancing existing tests, or reporting
test failures.
title: "[TESTS]"
labels: tests
assignees: ''

---

### Testing Requirement
- Describe the testing need or issue

### Test Scenarios
- Detail specific test scenarios to be addressed

### Acceptance Criteria
- What are the criteria for the test to be considered successful?
4 changes: 2 additions & 2 deletions aana/configs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
"name": "hf_blip2_opt_2_7b",
"type": "ray_deployment",
"deployment_name": "hf_blip2_deployment_opt_2_7b",
"method": "generate_captions",
"method": "generate_batch",
"inputs": [
{
"name": "images",
Expand Down Expand Up @@ -248,7 +248,7 @@
"name": "hf_blip2_opt_2_7b_video",
"type": "ray_deployment",
"deployment_name": "hf_blip2_deployment_opt_2_7b",
"method": "generate_captions",
"method": "generate_batch",
"flatten_by": "video_batch.videos.[*].frames.[*]",
"inputs": [
{
Expand Down
2 changes: 1 addition & 1 deletion aana/configs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Settings(BaseSettings):
"""

tmp_data_dir: Path = Path("/tmp/aana/data")
tmp_data_dir: Path = Path("/tmp/aana_data")


settings = Settings()
63 changes: 52 additions & 11 deletions aana/deployments/hf_blip2_deployment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List
from typing import Any, Dict, List, TypedDict
from pydantic import BaseModel, Field, validator
from ray import serve
import torch
Expand Down Expand Up @@ -48,6 +48,28 @@ def validate_dtype(cls, value: Dtype) -> Dtype:
return value


class CaptioningOutput(TypedDict):
"""
The output of the captioning model.
Attributes:
caption (str): the caption
"""

caption: str


class CaptioningBatchOutput(TypedDict):
"""
The output of the captioning model.
Attributes:
captions (List[str]): the list of captions
"""

captions: List[str]


@serve.deployment
class HFBlip2Deployment(BaseDeployment):
"""
Expand All @@ -71,19 +93,19 @@ async def apply_config(self, config: Dict[str, Any]):
# and process them in parallel
self.batch_size = config_obj.batch_size
self.num_processing_threads = config_obj.num_processing_threads
# The actual inference is done in _generate_captions()
# The actual inference is done in _generate()
# We use lambda because BatchProcessor expects dict as input
# and we use **kwargs to unpack the dict into named arguments for _generate_captions()
# and we use **kwargs to unpack the dict into named arguments for _generate()
self.batch_processor = BatchProcessor(
process_batch=lambda request: self._generate_captions(**request),
process_batch=lambda request: self._generate(**request),
batch_size=self.batch_size,
num_threads=self.num_processing_threads,
)

# Load the model and processor for BLIP2 from HuggingFace
self.model_id = config_obj.model
self.dtype = config_obj.dtype
self.torch_dtype = Dtype.to_torch(self.dtype)
self.torch_dtype = self.dtype.to_torch()
self.model = Blip2ForConditionalGeneration.from_pretrained(
self.model_id, torch_dtype=self.torch_dtype
)
Expand All @@ -92,25 +114,44 @@ async def apply_config(self, config: Dict[str, Any]):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model.to(self.device)

async def generate_captions(self, **kwargs) -> Dict[str, Any]:
async def generate(self, image: Image) -> CaptioningOutput:
"""
Generate captions for the given image.
Args:
image (Image): the image
Returns:
CaptioningOutput: the dictionary with one key "captions"
and the list of captions for the image as value
Raises:
InferenceException: if the inference fails
"""
captions: CaptioningBatchOutput = await self.batch_processor.process(
{"images": [image]}
)
return CaptioningOutput(caption=captions["captions"][0])

async def generate_batch(self, **kwargs) -> CaptioningBatchOutput:
"""
Generate captions for the given images.
Args:
images (List[Image]): the images
Returns:
Dict[str, Any]: the dictionary with one key "captions"
CaptioningBatchOutput: the dictionary with one key "captions"
and the list of captions for the images as value
Raises:
InferenceException: if the inference fails
"""
# Call the batch processor to process the requests
# The actual inference is done in _generate_captions()
# The actual inference is done in _generate()
return await self.batch_processor.process(kwargs)

def _generate_captions(self, images: List[Image]) -> Dict[str, Any]:
def _generate(self, images: List[Image]) -> CaptioningBatchOutput:
"""
Generate captions for the given images.
Expand All @@ -120,7 +161,7 @@ def _generate_captions(self, images: List[Image]) -> Dict[str, Any]:
images (List[Image]): the images
Returns:
Dict[str, Any]: the dictionary with one key "captions"
CaptioningBatchOutput: the dictionary with one key "captions"
and the list of captions for the images as value
Raises:
Expand All @@ -141,6 +182,6 @@ def _generate_captions(self, images: List[Image]) -> Dict[str, Any]:
generated_texts = [
generated_text.strip() for generated_text in generated_texts
]
return {"captions": generated_texts}
return CaptioningBatchOutput(captions=generated_texts)
except Exception as e:
raise InferenceException(self.model_id) from e
47 changes: 37 additions & 10 deletions aana/deployments/vllm_deployment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional
from typing import Any, AsyncGenerator, Dict, List, Optional, TypedDict
from pydantic import BaseModel, Field
from ray import serve
from vllm.engine.arg_utils import AsyncEngineArgs
Expand Down Expand Up @@ -34,6 +34,28 @@ class VLLMConfig(BaseModel):
max_model_len: Optional[int] = Field(default=None)


class LLMOutput(TypedDict):
"""
The output of the LLM model.
Attributes:
text (str): the generated text
"""

text: str


class LLMBatchOutput(TypedDict):
"""
The output of the LLM model for a batch of inputs.
Attributes:
texts (List[str]): the list of generated texts
"""

texts: List[str]


@serve.deployment
class VLLMDeployment(BaseDeployment):
"""
Expand Down Expand Up @@ -78,7 +100,9 @@ async def apply_config(self, config: Dict[str, Any]):
# create the engine
self.engine = AsyncLLMEngine.from_engine_args(args)

async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
async def generate_stream(
self, prompt: str, sampling_params: SamplingParams
) -> AsyncGenerator[LLMOutput, None]:
"""
Generate completion for the given prompt and stream the results.
Expand All @@ -87,7 +111,7 @@ async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
sampling_params (SamplingParams): the sampling parameters
Yields:
dict: the generated text
LLMOutput: the dictionary with the key "text" and the generated text as the value
"""
prompt = str(prompt)
sampling_params = merged_options(self.default_sampling_params, sampling_params)
Expand All @@ -108,7 +132,7 @@ async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
num_returned = 0
async for request_output in results_generator:
text_output = request_output.outputs[0].text[num_returned:]
yield {"text": text_output}
yield LLMOutput(text=text_output)
num_returned += len(text_output)
except GeneratorExit as e:
# If the generator is cancelled, we need to cancel the request
Expand All @@ -118,7 +142,7 @@ async def generate_stream(self, prompt: str, sampling_params: SamplingParams):
except Exception as e:
raise InferenceException(model_name=self.model) from e

async def generate(self, prompt: str, sampling_params: SamplingParams):
async def generate(self, prompt: str, sampling_params: SamplingParams) -> LLMOutput:
"""
Generate completion for the given prompt.
Expand All @@ -127,14 +151,16 @@ async def generate(self, prompt: str, sampling_params: SamplingParams):
sampling_params (SamplingParams): the sampling parameters
Returns:
dict: the generated text
LLMOutput: the dictionary with the key "text" and the generated text as the value
"""
generated_text = ""
async for chunk in self.generate_stream(prompt, sampling_params):
generated_text += chunk["text"]
return {"text": generated_text}
return LLMOutput(text=generated_text)

async def generate_batch(self, prompts: List[str], sampling_params: SamplingParams):
async def generate_batch(
self, prompts: List[str], sampling_params: SamplingParams
) -> LLMBatchOutput:
"""
Generate completion for the batch of prompts.
Expand All @@ -143,11 +169,12 @@ async def generate_batch(self, prompts: List[str], sampling_params: SamplingPara
sampling_params (SamplingParams): the sampling parameters
Returns:
dict: the generated texts
LLMBatchOutput: the dictionary with the key "texts"
and the list of generated texts as the value
"""
texts = []
for prompt in prompts:
text = await self.generate(prompt, sampling_params)
texts.append(text["text"])

return {"texts": texts}
return LLMBatchOutput(texts=texts)
Loading

0 comments on commit 3337777

Please sign in to comment.