diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index fe1f7a78c93f74..258039f226ffe8 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -135,8 +135,13 @@ def _get_is_as_tensor_fns(self, tensor_type: Optional[Union[str, TensorType]] = raise ImportError("Unable to convert output to PyTorch tensors format, PyTorch is not installed.") import torch # noqa + def recursive_ndarray_check(value): + if isinstance(value, (list, tuple)) and len(value) > 0: + return recursive_ndarray_check(value[0]) + return isinstance(value, np.ndarray) + def as_tensor(value): - if isinstance(value, (list, tuple)) and len(value) > 0 and isinstance(value[0], np.ndarray): + if recursive_ndarray_check(value): value = np.array(value) return torch.tensor(value) diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index 6df708eec3ea04..7501668bd853ff 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -339,5 +339,7 @@ def preprocess( for video in videos ] - data = {"pixel_values": videos} + # Speeds up tensor conversion - see: https://github.com/huggingface/transformers/pull/28221/files + data = {"pixel_values": np.asarray(videos) if return_tensors is not None else videos} + return BatchFeature(data=data, tensor_type=return_tensors)