Skip to content

Commit

Permalink
Merge pull request espnet#5817 from espnet/kamo-naoyuki-patch-1
Browse files Browse the repository at this point in the history
Avoid zero division for speech_volume_normalize
  • Loading branch information
kamo-naoyuki authored Jun 14, 2024
2 parents 63c4c09 + e840295 commit 19787b1
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions espnet2/train/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,8 @@ def _speech_process(
if self.speech_volume_normalize is not None:
speech = data[self.speech_name]
ma = np.max(np.abs(speech))
data[self.speech_name] = speech * self.speech_volume_normalize / ma
if ma != 0:
data[self.speech_name] = speech * self.speech_volume_normalize / ma
return data

def _text_process(
Expand Down Expand Up @@ -1420,7 +1421,10 @@ def _speech_process(
# use a fixed scale to make it deterministic
volume_scale = self.volume_low
ma = np.max(np.abs(data[self.speech_name]))
self._apply_to_all_signals(data, lambda x: x * volume_scale / ma, num_spk)
if ma != 0:
self._apply_to_all_signals(
data, lambda x: x * volume_scale / ma, num_spk
)

if self.categories and "category" in data:
category = data.pop("category")
Expand Down Expand Up @@ -1531,7 +1535,10 @@ def __call__(
if self.singing_volume_normalize is not None:
singing = data[self.singing_name]
ma = np.max(np.abs(singing))
data[self.singing_name] = singing * self.singing_volume_normalize / ma
if ma != 0:
data[self.singing_name] = (
singing * self.singing_volume_normalize / ma
)

if self.midi_name in data and self.label_name in data:
# Load label info
Expand Down

0 comments on commit 19787b1

Please sign in to comment.