Skip to content

Commit

Permalink
[LLaVa-NeXT] Small fixes (#30841)
Browse files Browse the repository at this point in the history
* First draft

* Update docstring
  • Loading branch information
NielsRogge authored May 16, 2024
1 parent 4b3eb19 commit 9fd606d
Showing 1 changed file with 8 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ class LlavaNextImageProcessor(BaseImageProcessor):
number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
Can be overridden by the `image_std` parameter in the `preprocess` method.
do_pad (`bool`, *optional*, defaults to `True`):
Whether to pad the image. If `True` will pad the images in the batch to the largest image in the batch
and create a pixel mask. Padding will be applied to the bottom and right of the image with zeros.
Whether to pad the image. If `True`, will pad the patch dimension of the images in the batch to the largest
number of patches in the batch. Padding will be applied to the bottom and right with zeros.
do_convert_rgb (`bool`, *optional*, defaults to `True`):
Whether to convert the image to RGB.
"""
Expand Down Expand Up @@ -203,6 +203,7 @@ def __init__(
self.do_normalize = do_normalize
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
self.do_pad = do_pad
self.do_convert_rgb = do_convert_rgb

# Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize with CLIP->LLaVa
Expand Down Expand Up @@ -572,7 +573,7 @@ def preprocess(
do_normalize: bool = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = True,
do_pad: Optional[bool] = None,
do_convert_rgb: bool = None,
return_tensors: Optional[Union[str, TensorType]] = None,
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
Expand Down Expand Up @@ -609,9 +610,9 @@ def preprocess(
image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to
`True`.
do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image. If `True` will pad the images in the batch to the largest image in the batch
and create a pixel mask. Padding will be applied to the bottom and right of the image with zeros.
do_pad (`bool`, *optional*, defaults to `self.do_pad`):
Whether to pad the image. If `True`, will pad the patch dimension of the images in the batch to the largest
number of patches in the batch. Padding will be applied to the bottom and right with zeros.
do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
Whether to convert the image to RGB.
return_tensors (`str` or `TensorType`, *optional*):
Expand Down Expand Up @@ -647,6 +648,7 @@ def preprocess(
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
image_mean = image_mean if image_mean is not None else self.image_mean
image_std = image_std if image_std is not None else self.image_std
do_pad = do_pad if do_pad is not None else self.do_pad
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb

images = make_list_of_images(images)
Expand Down

0 comments on commit 9fd606d

Please sign in to comment.