diff --git a/src/transformers/pipelines/zero_shot_audio_classification.py b/src/transformers/pipelines/zero_shot_audio_classification.py index 59500d14e104e7..8ed339a5b7f889 100644 --- a/src/transformers/pipelines/zero_shot_audio_classification.py +++ b/src/transformers/pipelines/zero_shot_audio_classification.py @@ -78,16 +78,17 @@ def __call__(self, audios: Union[np.ndarray, bytes, str], **kwargs): - A string containing a local path to an audio - An audio loaded in numpy candidate_labels (`List[str]`): - The candidate labels for this audio + The candidate labels for this audio. They will be formatted using *hypothesis_template*. hypothesis_template (`str`, *optional*, defaults to `"This is a sound of {}"`): - The sentence used in cunjunction with *candidate_labels* to attempt the audio classification by - replacing the placeholder with the candidate_labels. Then likelihood is estimated by using - logits_per_audio + The format used in conjunction with *candidate_labels* to attempt the audio classification by + replacing the placeholder with the candidate_labels. Pass "{}" if *candidate_labels* are + already formatted. Return: - A list of dictionaries containing result, one dictionary per proposed label. The dictionaries contain the + A list of dictionaries containing one entry per proposed label. Each dictionary contains the following keys: - - **label** (`str`) -- The label identified by the model. It is one of the suggested `candidate_label`. - - **score** (`float`) -- The score attributed by the model for that label (between 0 and 1). + - **label** (`str`) -- One of the suggested *candidate_labels*. + - **score** (`float`) -- The score attributed by the model to that label. It is a value between + 0 and 1, computed as the `softmax` of `logits_per_audio`. """ return super().__call__(audios, **kwargs) diff --git a/src/transformers/pipelines/zero_shot_image_classification.py b/src/transformers/pipelines/zero_shot_image_classification.py index 61d5ff52227559..b7e13e782e78cc 100644 --- a/src/transformers/pipelines/zero_shot_image_classification.py +++ b/src/transformers/pipelines/zero_shot_image_classification.py @@ -86,12 +86,12 @@ def __call__(self, images: Union[str, List[str], "Image", List["Image"]], **kwar - An image loaded in PIL directly candidate_labels (`List[str]`): - The candidate labels for this image + The candidate labels for this image. They will be formatted using *hypothesis_template*. hypothesis_template (`str`, *optional*, defaults to `"This is a photo of {}"`): - The sentence used in cunjunction with *candidate_labels* to attempt the image classification by - replacing the placeholder with the candidate_labels. Then likelihood is estimated by using - logits_per_image + The format used in conjunction with *candidate_labels* to attempt the image classification by + replacing the placeholder with the candidate_labels. Pass "{}" if *candidate_labels* are + already formatted. timeout (`float`, *optional*, defaults to None): The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and @@ -101,11 +101,11 @@ def __call__(self, images: Union[str, List[str], "Image", List["Image"]], **kwar Additional dictionary of keyword arguments passed along to the tokenizer. Return: - A list of dictionaries containing result, one dictionary per proposed label. The dictionaries contain the + A list of dictionaries containing one entry per proposed label. Each dictionary contains the following keys: - - - **label** (`str`) -- The label identified by the model. It is one of the suggested `candidate_label`. - - **score** (`float`) -- The score attributed by the model for that label (between 0 and 1). + - **label** (`str`) -- One of the suggested *candidate_labels*. + - **score** (`float`) -- The score attributed by the model to that label. It is a value between + 0 and 1, computed as the `softmax` of `logits_per_image`. """ return super().__call__(images, **kwargs)