From bf66771de5888452a3e394a9829c42421229df44 Mon Sep 17 00:00:00 2001 From: ftshijt Date: Thu, 28 Sep 2023 11:32:39 -0400 Subject: [PATCH] update publication --- _bibliography/papers.bib | 163 ++ _site/publications/index.html | 2919 +++++++++++++++++++++++++++++++++ 2 files changed, 3082 insertions(+) diff --git a/_bibliography/papers.bib b/_bibliography/papers.bib index d0fe7de0..114c9766 100644 --- a/_bibliography/papers.bib +++ b/_bibliography/papers.bib @@ -14,6 +14,169 @@ @string{WASPAA @string{APSIPA = {Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)}} @string{ICML = {Proceedings of the International Conference on Machine Learning (ICML)}} @string{ICLR = {Proceedings of the International Conference on Learning Representations (ICLR)}} +@string{NeurIPS = {Proceedings of the Conference on Neural Information Processing Systems}} + + +@inproceddings{chou2023evaluating, + abbr={ASR}, + abbr_publisher={ASRU}, + title={Evaluating Self-supervised Speech Models on a Taiwanese Hokkien Corpus}, + author={Yi-Hui Chou and Kalvin Chang and Meng-Ju Wu and Winston Ou and Alice Wen-Hsin Bi and Carol Yang and Bryan Y. Chen and Rong-Wei Pai and Po-Yen Yeh and Jo-Peng Chiang and Lu-Tshiann Phoann and Winnie Chang and Chenxuan Cui and Noel Chen and Jiatong Shi}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, + +} + +@inproceedings{huang2023singing, + abbr={SVC}, + abbr_publisher={ASRU}, + title={The Singing Voice Conversion Challenge 2023}, + author={Wen-Chin Huang and Lester Phillip Violeta and Songxiang Liu and Jiatong Shi and Tomoki Toda}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{shiohara2023domain, + abbr={ASR}, + abbr_publisher={ASRU}, + title={Domain Adaptation by Data Distribution Matching via Submodularity for Speech Recognition}, + author={Yusuke Shinohara and Shinji Watanabe}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{kano2023summarize, + abbr={Summarization&ST}, + abbr_publisher={ASRU}, + title={Summarize while Translating: Universal Model with Parallel Decoding for Summarization and Translation}, + author={Takatomo Kano and Atsunori Ogawa and Marc Delcroix and Kohei Matsuura and Takanori Ashihara and William Chen and Shinji Watanabe}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{li2023yodas, + abbr={ASR}, + abbr_publisher={ASRU}, + title={YODAS: Youtube-Oriented Dataset for Audio and Speech}, + author={Xinjian Li and Shinnosuke Takamichi and Takaaki Saeki and William Chen and Sayaka Shiota and Shinji Watanabe}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{kohei2023single, + abbr={SE&SS}, + abbr_publisher={ASRU}, + title={A Single Speech Enhancement Model Unifying Dereverberation, Denoising, Speaker Counting, Separation, and Extraction}, + author={Kohei Saijo and Wangyou Zhang and Zhong-Qiu Wang and Shinji Watanabe and Tetsunori Kobayashi and Tetsuji Ogawa}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{wang2023torchaudio, + abbr={ASR&SSL}, + abbr_publisher={ASRU}, + title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch}, + author={Jeff Hwang and Moto Hira and Caroline Chen and Xiaohui Zhang and Zhaoheng Ni and Guangzhi Sun and Pingchuan Ma and Ruizhe Huang and Vineel Pratap and Yuekai Zhang and Anurag Kumar and Chin-Yun Yu and Chuang Zhu and Chunxi Liu and Jacob Kahn and Mirco Ravanelli and Peng Sun and Shinji Watanabe and Yangyang Shi and Yumeng Tao}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{zhang2023toward, + abbr={SE}, + abbr_publisher={ASRU}, + title={Toward Universal Speech Enhancement For Diverse Input Conditions}, + author={Wangyou Zhang and Kohei Saijo and Zhong-Qiu Wang and Shinji Watanabe and Yanmin Qian}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{shi2023findings, + abbr={ASR}, + abbr_publisher={ASRU}, + title={Findings of the 2023 ML-SUPERB Challenge: Pre-Training and Evaluation over More Languages and Beyond}, + author={Jiatong Shi and William Chen and Dan Berrebbi and Hsiu-Hsuan Wang and Wei Ping Huang and En Pei Hu and ho lam Chung and Xuankai Chang and Yuxun Tang and Shang-Wen Li and Abdelrahman Mohamed and Hung-yi Lee and Shinji Watanabe}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{chen2023joint, + abbr={SSL}, + abbr_publisher={ASRU}, + title={Joint Prediction and Denoising for Large-Scale Multilingual Self-Supervised Learning}, + author={William Chen and Jiatong Shi and Brian Yan and Dan Berrebbi and Wangyou Zhang and Yifan Peng and Xuankai Chang and Soumi Maiti and Shinji Watanabe}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{someki2023segment, + abbr={ASR}, + abbr_publisher={ASRU}, + title={Segment-Level Vectorized Beam Search Based on Partially Autoregressive Inference}, + author={Masao Someki and Nicholas Eng and Yosuke Higuchi and Shinji Watanabe}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{peng2023reproducing, + abbr={ASR&ST}, + abbr_publisher={ASRU}, + title={Reproducing Whisper-Style Training Using an Open-Source Toolkit and Publicly Available Data}, + author={Yifan Peng and Jinchuan Tian and Brian Yan and Dan Berrebbi and Xuankai Chang and Xinjian Li and Jiatong Shi and Siddhant Arora and William Chen and Roshan Sharma and Wangyou Zhang and Yui Sudo and Muhammad Shakeel and Jee-weon Jung and Soumi Maiti and Shinji Watanabe}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{roshan2023espnet, + abbr={Summarization}, + abbr_publisher={ASRU}, + title={ESPNet-SUMM: Introducing a novel large dataset, toolkit, and a cross-corpora evaluation of speech summarization systems}, + author={Roshan Sharma and William Chen and Takatomo Kano and Ruchira Sharma and Atsunori Ogawa and Siddhant Arora and Marc Delcroix and Rita Singh and Shinji Watanabe and Bhiksha Raj}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + + +@inproceedings{fujita2023lvctc, + abbr={ASR}, + abbr_publisher={ASRU}, + title={LV-CTC: Non-autoregressive ASR with CTC and latent variable models}, + author={Yuya Fujita and Shinji Watanabe and Xuankai Chang and Takashi Maekaku}, + booktitle=ASRU, + year={2023}, + publisher={IEEE}, +} + +@inproceedings{wang2023unssor, + abbr={SS}, + abbr_publisher={NeurIPS}, + title={UNSSOR: Unsupervised Neural Speech Separation by Leveraging Over-determined Training Mixtures}, + author={Zhong-Qiu Wang and Shinji Watanabe}, + booktitle=NeurIPS, + year={2023}, +} + +@inproceedings{masuyama2023exploring, + abbr={SS}, + abbr_publisher={WASPAA}, + title={Exploring the Integration of Speech Separation and Recognition with Self-Supervised Learning Representation}, + author={Yoshiki Masuyama and Xuankai Chang and Wangyou Zhang and Samuele Cornell and Zhong-Qiu Wang and Nobutaka Ono and Yanmin Qian and Shinji Watanabe}, + booktitle=WASPAA, + year={2023}, +} + @article{maciejewski2023adilemma, abbr={SS}, diff --git a/_site/publications/index.html b/_site/publications/index.html index 4160ccc5..8281ef47 100644 --- a/_site/publications/index.html +++ b/_site/publications/index.html @@ -261,6 +261,2925 @@

2023

+ ASR + + + + + ASRU + + +
+ +
+ +
Evaluating Self-supervised Speech Models on a Taiwanese Hokkien Corpus
+
+ + + + + + + + + + + Yi-Hui Chou, + + + + + + + + + + + + + + + + + + + Kalvin Chang, + + + + + + + + + + + + + + + Meng-Ju Wu, + + + + + + + + + + + + + + + Winston Ou, + + + + + + + + + + + + + + + Alice Wen-Hsin Bi, + + + + + + + + + + + + + + + Carol Yang, + + + + + + + + + + + + + + + Bryan Y. Chen, + + + + + + + + + + + + + + + Rong-Wei Pai, + + + + + + + + + + + + + + + Po-Yen Yeh, + + + + + + + + + + + + + + + Jo-Peng Chiang, + + + + + + + + + + + + + + + Lu-Tshiann Phoann, + + + + + + + + + + + + + + + + + + + Winnie Chang, + + + + + + + + + + + + + + + Chenxuan Cui, + + + + + + + + + + + + + + + Noel Chen, + + + + + + + + + + + + + + + + + + + + and Jiatong Shi + + + + + +
+ +
+ + + 2023 + +
+ + + + + + +
+ + +
  • +
    + + + SVC + + + + + ASRU + + +
    + +
    + +
    The Singing Voice Conversion Challenge 2023
    +
    + + + + + + + + + + + Wen-Chin Huang, + + + + + + + + + + + + + + + Lester Phillip Violeta, + + + + + + + + + + + + + + + Songxiang Liu, + + + + + + + + + + + + + + + + + + + + Jiatong Shi, + + + + + + + + + + + + + + + and Tomoki Toda + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + ASR + + + + + ASRU + + +
    + +
    + +
    Domain Adaptation by Data Distribution Matching via Submodularity for Speech Recognition
    +
    + + + + + + + + + + + Yusuke Shinohara, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + Summarization&ST + + + + + ASRU + + +
    + +
    + +
    Summarize while Translating: Universal Model with Parallel Decoding for Summarization and Translation
    +
    + + + + + + + + + + + Takatomo Kano, + + + + + + + + + + + + + + + Atsunori Ogawa, + + + + + + + + + + + + + + + Marc Delcroix, + + + + + + + + + + + + + + + Kohei Matsuura, + + + + + + + + + + + + + + + Takanori Ashihara, + + + + + + + + + + + + + + + William Chen, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + ASR + + + + + ASRU + + +
    + +
    + +
    YODAS: Youtube-Oriented Dataset for Audio and Speech
    +
    + + + + + + + + + + + Xinjian Li, + + + + + + + + + + + + + + + Shinnosuke Takamichi, + + + + + + + + + + + + + + + Takaaki Saeki, + + + + + + + + + + + + + + + William Chen, + + + + + + + + + + + + + + + Sayaka Shiota, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + SE&SS + + + + + ASRU + + +
    + +
    + +
    A Single Speech Enhancement Model Unifying Dereverberation, Denoising, Speaker Counting, Separation, and Extraction
    +
    + + + + + + + + + + + Kohei Saijo, + + + + + + + + + + + + + + + Wangyou Zhang, + + + + + + + + + + + + + + + Zhong-Qiu Wang, + + + + + + + + + + + + + + + + + + + + Shinji Watanabe, + + + + + + + + + + + + + + + Tetsunori Kobayashi, + + + + + + + + + + + + + + + and Tetsuji Ogawa + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + ASR&SSL + + + + + ASRU + + +
    + +
    + +
    TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch
    +
    + + + + + + + + + + + Jeff Hwang, + + + + + + + + + + + + + + + Moto Hira, + + + + + + + + + + + + + + + Caroline Chen, + + + + + + + + + + + + + + + Xiaohui Zhang, + + + + + + + + + + + + + + + Zhaoheng Ni, + + + + + + + + + + + + + + + Guangzhi Sun, + + + + + + + + + + + + + + + Pingchuan Ma, + + + + + + + + + + + + + + + Ruizhe Huang, + + + + + + + + + + + + + + + Vineel Pratap, + + + + + + + + + + + + + + + Yuekai Zhang, + + + + + + + + + + + + + + + Anurag Kumar, + + + + + + + + + + + + + + + Chin-Yun Yu, + + + + + + + + + + + + + + + Chuang Zhu, + + + + + + + + + + + + + + + Chunxi Liu, + + + + + + + + + + + + + + + Jacob Kahn, + + + + + + + + + + + + + + + Mirco Ravanelli, + + + + + + + + + + + + + + + Peng Sun, + + + + + + + + + + + + + + + + + + + + Shinji Watanabe, + + + + + + + + + + + + + + + + + + + Yangyang Shi, + + + + + + + + + + + + + + + and Yumeng Tao + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + SE + + + + + ASRU + + +
    + +
    + +
    Toward Universal Speech Enhancement For Diverse Input Conditions
    +
    + + + + + + + + + + + Wangyou Zhang, + + + + + + + + + + + + + + + Kohei Saijo, + + + + + + + + + + + + + + + Zhong-Qiu Wang, + + + + + + + + + + + + + + + + + + + + Shinji Watanabe, + + + + + + + + + + + + + + + and Yanmin Qian + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + ASR + + + + + ASRU + + +
    + +
    + +
    Findings of the 2023 ML-SUPERB Challenge: Pre-Training and Evaluation over More Languages and Beyond
    +
    + + + + + + + + + + + + + + + + Jiatong Shi, + + + + + + + + + + + + + + + William Chen, + + + + + + + + + + + + + + + Dan Berrebbi, + + + + + + + + + + + + + + + Hsiu-Hsuan Wang, + + + + + + + + + + + + + + + Wei Ping Huang, + + + + + + + + + + + + + + + En Pei Hu, + + + + + + + + + + + + + + + Chung, + + + + + + + + + + + + + + + + + + + + Xuankai Chang, + + + + + + + + + + + + + + + Yuxun Tang, + + + + + + + + + + + + + + + Shang-Wen Li, + + + + + + + + + + + + + + + Abdelrahman Mohamed, + + + + + + + + + + + + + + + Hung-yi Lee, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + SSL + + + + + ASRU + + +
    + +
    + +
    Joint Prediction and Denoising for Large-Scale Multilingual Self-Supervised Learning
    +
    + + + + + + + + + + + William Chen, + + + + + + + + + + + + + + + + + + + + Jiatong Shi, + + + + + + + + + + + + + + + + + + + + Brian Yan, + + + + + + + + + + + + + + + Dan Berrebbi, + + + + + + + + + + + + + + + Wangyou Zhang, + + + + + + + + + + + + + + + + + + + + Yifan Peng, + + + + + + + + + + + + + + + + + + + + Xuankai Chang, + + + + + + + + + + + + + + + Soumi Maiti, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + ASR + + + + + ASRU + + +
    + +
    + +
    Segment-Level Vectorized Beam Search Based on Partially Autoregressive Inference
    +
    + + + + + + + + + + + Masao Someki, + + + + + + + + + + + + + + + Nicholas Eng, + + + + + + + + + + + + + + + Yosuke Higuchi, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + ASR&ST + + + + + ASRU + + +
    + +
    + +
    Reproducing Whisper-Style Training Using an Open-Source Toolkit and Publicly Available Data
    +
    + + + + + + + + + + + + + + + + Yifan Peng, + + + + + + + + + + + + + + + Jinchuan Tian, + + + + + + + + + + + + + + + + + + + + Brian Yan, + + + + + + + + + + + + + + + Dan Berrebbi, + + + + + + + + + + + + + + + + + + + + Xuankai Chang, + + + + + + + + + + + + + + + Xinjian Li, + + + + + + + + + + + + + + + + + + + + Jiatong Shi, + + + + + + + + + + + + + + + Siddhant Arora, + + + + + + + + + + + + + + + William Chen, + + + + + + + + + + + + + + + Roshan Sharma, + + + + + + + + + + + + + + + Wangyou Zhang, + + + + + + + + + + + + + + + Yui Sudo, + + + + + + + + + + + + + + + Muhammad Shakeel, + + + + + + + + + + + + + + + + + + + + Jee-weon Jung, + + + + + + + + + + + + + + + Soumi Maiti, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + Summarization + + + + + ASRU + + +
    + +
    + +
    ESPNet-SUMM: Introducing a novel large dataset, toolkit, and a cross-corpora evaluation of speech summarization systems
    +
    + + + + + + + + + + + Roshan Sharma, + + + + + + + + + + + + + + + William Chen, + + + + + + + + + + + + + + + Takatomo Kano, + + + + + + + + + + + + + + + Ruchira Sharma, + + + + + + + + + + + + + + + Atsunori Ogawa, + + + + + + + + + + + + + + + Siddhant Arora, + + + + + + + + + + + + + + + Marc Delcroix, + + + + + + + + + + + + + + + Rita Singh, + + + + + + + + + + + + + + + + + + + + Shinji Watanabe, + + + + + + + + + + + + + + + and Bhiksha Raj + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + ASR + + + + + ASRU + + +
    + +
    + +
    LV-CTC: Non-autoregressive ASR with CTC and latent variable models
    +
    + + + + + + + + + + + Yuya Fujita, + + + + + + + + + + + + + + + + + + + + Shinji Watanabe, + + + + + + + + + + + + + + + + + + + + Xuankai Chang, + + + + + + + + + + + + + + + and Takashi Maekaku + + + + + +
    + +
    + + In IEEE Automatic Speech Recogiton and Understanding Workshop (ASRU) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + SS + + + + + NeurIPS + + +
    + +
    + +
    UNSSOR: Unsupervised Neural Speech Separation by Leveraging Over-determined Training Mixtures
    +
    + + + + + + + + + + + Zhong-Qiu Wang, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In Proceedings of the Conference on Neural Information Processing Systems + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + + SS + + + + + WASPAA + + +
    + +
    + +
    Exploring the Integration of Speech Separation and Recognition with Self-Supervised Learning Representation
    +
    + + + + + + + + + + + Yoshiki Masuyama, + + + + + + + + + + + + + + + + + + + + Xuankai Chang, + + + + + + + + + + + + + + + Wangyou Zhang, + + + + + + + + + + + + + + + Samuele Cornell, + + + + + + + + + + + + + + + Zhong-Qiu Wang, + + + + + + + + + + + + + + + Nobutaka Ono, + + + + + + + + + + + + + + + Yanmin Qian, + + + + + + + + + + + + + + + + + + + + and Shinji Watanabe + + + + + +
    + +
    + + In IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA) + + + 2023 + +
    + + + + + + +
    +
    +
  • +
  • +
    + + SS