Skip to content

Commit

Permalink
More improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
NielsRogge committed Dec 7, 2023
1 parent 5274692 commit c6b7e52
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 29 deletions.
4 changes: 2 additions & 2 deletions src/transformers/models/llava/modeling_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ def forward(
>>> import requests
>>> from transformers import AutoProcessor, LlavaForConditionalGeneration
>>> model = LlavaForConditionalGeneration.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
>>> processor = AutoProcessor.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
>>> model = LlavaForConditionalGeneration.from_pretrained("llava-hf/llava-1.5-7b-hf")
>>> processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
>>> prompt = "<image>\nUSER: What's the content of the image?\nASSISTANT:"
>>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
Expand Down
30 changes: 3 additions & 27 deletions src/transformers/models/llava/processing_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
"""


import warnings
from typing import Callable, List, Optional, Union
from typing import List, Optional, Union

from ...feature_extraction_utils import BatchFeature
from ...image_utils import ImageInput
Expand All @@ -45,23 +44,7 @@ class LlavaProcessor(ProcessorMixin):
image_processor_class = "CLIPImageProcessor"
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")

# Copied from transformers.models.clip.processing_clip.CLIPProcessor.__init__
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
feature_extractor = None
if "feature_extractor" in kwargs:
warnings.warn(
"The `feature_extractor` argument is deprecated and will be removed in v5, use `image_processor`"
" instead.",
FutureWarning,
)
feature_extractor = kwargs.pop("feature_extractor")

image_processor = image_processor if image_processor is not None else feature_extractor
if image_processor is None:
raise ValueError("You need to specify an `image_processor`.")
if tokenizer is None:
raise ValueError("You need to specify a `tokenizer`.")

def __init__(self, image_processor=None, tokenizer=None):
super().__init__(image_processor, tokenizer)

def __call__(
Expand All @@ -70,7 +53,6 @@ def __call__(
images: ImageInput = None,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = None,
transform: Callable = None,
max_length=None,
return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
) -> BatchFeature:
Expand Down Expand Up @@ -103,10 +85,6 @@ def __call__(
Maximum length of the returned list and optionally padding length (see above).
truncation (`bool`, *optional*):
Activates truncation to cut input sequences longer than `max_length` to `max_length`.
transform (`Callable`, *optional*):
A custom transform function that accepts a single image can be passed for training. For example,
`torchvision.Compose` can be used to compose multiple functions. If `None` a preset inference-specific
set of transforms will be applied to the images
return_tensors (`str` or [`~utils.TensorType`], *optional*):
If set, will return tensors of a particular framework. Acceptable values are:
Expand All @@ -125,9 +103,7 @@ def __call__(
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
"""
if images is not None:
pixel_values = self.image_processor(images, transform=transform, return_tensors=return_tensors)[
"pixel_values"
]
pixel_values = self.image_processor(images, return_tensors=return_tensors)["pixel_values"]
else:
pixel_values = None
text_inputs = self.tokenizer(
Expand Down

0 comments on commit c6b7e52

Please sign in to comment.