paper.bib

@inproceedings{abadi2016tensorflow,
title={Tensorflow: A system for large-scale machine learning},
author={Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others},
booktitle={{USENIX} symposium on operating systems design and implementation ({OSDI})},
pages={265--283},
year={2016}
}

@article{wu2020multi,
title={Multi-Instrument Automatic Music Transcription With Self-Attention-Based Instance Segmentation},
author={Wu, Yu-Te and Chen, Berlin and Su, Li},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume={28},
pages={2796--2809},
year={2020},
publisher={IEEE},
doi={10.1109/taslp.2020.3030482}
}

@inproceedings{hawthorne2018enabling,
author    = {Curtis Hawthorne and
             Andriy Stasyuk and
             Adam Roberts and
             Ian Simon and
             Cheng{-}Zhi Anna Huang and
             Sander Dieleman and
             Erich Elsen and
             Jesse H. Engel and
             Douglas Eck},
title     = {Enabling Factorized Piano Music Modeling and Generation with the {MAESTRO}
             Dataset},
booktitle = {International Conference on Learning Representations ({ICLR})},
year      = {2019},
}

@inproceedings{parmar2018image,
author    = {Niki Parmar and
             Ashish Vaswani and
             Jakob Uszkoreit and
             Lukasz Kaiser and
             Noam Shazeer and
             Alexander Ku and
             Dustin Tran},
title     = {Image Transformer},
booktitle = {Proceedings of the 35th International Conference on Machine Learning ({ICML})},
pages     = {4052--4061},
year      = {2018},
}

@incollection{Chen2018DeepLabV3,
	doi = {10.1007/978-3-030-01234-2_49},
	url = {https://doi.org/10.1007%2F978-3-030-01234-2_49},
	year = 2018,
	publisher = {Springer International Publishing},
	pages = {833--851},
	author = {Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
	title = {Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
	booktitle = {Computer Vision {\textendash} {ECCV}}
}

@article{su2015combining,  
author={Su, Li and Yi{-}Hsuan Yang},  
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, 
title={Combining Spectral and Temporal Representations for Multipitch Estimation of Polyphonic Music},
year={2015},  
volume={23},  
number={10},  
pages={1600-1612},
publisher={IEEE},
doi={10.1109/taslp.2015.2442411}
}

@inproceedings{kelz2016potential,
title={On the Potential of Simple Framewise Approaches to Piano Transcription},
author={Rainer Kelz and
        Matthias Dorfer and
        Filip Korzeniowski and
        Sebastian B{\"{o}}ck and
        Andreas Arzt and
        Gerhard Widmer},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
pages={475--481},
year={2016},
}

@inproceedings{wei2021improving,
title={Improving automatic drum transcription using large-scale audio-to-{MIDI} aligned data},
author={Wei, I-Chieh and Wu, Chih-Wei and Su, Li},
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
year={2021},
doi={10.1109/icassp39728.2021.9414409}
}

@inproceedings{wu2018automatic,
title={Automatic Music Transcription Leveraging Generalized Cepstral Features and Deep Learning},
author={Wu, Yu-Te and Chen, Berlin and Su, Li},
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
pages={401--405},
year={2018},
doi={10.1109/icassp.2018.8462079}
}

@INPROCEEDINGS{su2018vocal, 
author={Su, Li}, 
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})}, 
title={Vocal Melody Extraction Using Patch-Based {CNN}}, 
year={2018}, 
pages={371-375},
doi={10.1109/icassp.2018.8462420}
}

@inproceedings{fu2019hierarchical,
title={Hierarchical classification networks for singing voice segmentation and transcription},
author={Fu, Z.-S. and Su, L.},
year={2019},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})}
}

@article{yamada2019shakedrop,
title={Shakedrop regularization for deep residual learning},
author={Yamada, Y. and Iwamura, M. and Akiba, T. and Kise, K.},
journal={IEEE Access},
volume={7},
pages={186126--186136},
year={2019},
publisher={IEEE},
doi={10.1109/access.2019.2960566}
}

@article{miyato2018virtual,
  author    = {Takeru Miyato and
               Shin{-}ichi Maeda and
               Masanori Koyama and
               Shin Ishii},
  title     = {Virtual Adversarial Training: {A} Regularization Method for Supervised
               and Semi-Supervised Learning},
  journal   = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
  volume    = {41},
  number    = {8},
  pages     = {1979--1993},
  year      = {2019},
  url       = {https://doi.org/10.1109/TPAMI.2018.2858821},
  doi       = {10.1109/TPAMI.2018.2858821},
  timestamp = {Mon, 26 Oct 2020 09:04:26 +0100},
  biburl    = {https://dblp.org/rec/journals/pami/MiyatoMKI19.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{chen2019harmony,
title={Harmony Transformer: Incorporating chord segmentation into harmony recognition},
author={Chen, Tsung-Ping and Su, Li},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
year={2019}
}

@inproceedings{mora2010characterization,
title={Characterization and melodic similarity of a cappella flamenco cantes},
author={Mora, Joaqu{\'\i}n and G{\'o}mez, Francisco and G{\'o}mez, Emilia and Escobar-Borrego, Francisco and D{\'\i}az-B{\'a}{\~n}ez, Jos{\'e} Miguel},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
pages={9--13},
year={2010}
}

@article{hsu2009improvement,
title={On the improvement of singing voice separation for monaural recordings using the MIR-1K dataset},
author={Hsu, Chao-Ling and Jang, Jyh-Shing Roger},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume={18},
number={2},
pages={310--319},
year={2009},
publisher={IEEE},
doi={10.1109/tasl.2009.2026503}
}

@inproceedings{molina2014evaluation,
title={Evaluation framework for automatic singing transcription},
author={Molina, E. and Barbancho-Perez, A. M. and Tard{\'o}n, L. J. and Barbancho-Perez, I. and others},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
year={2014}
}

@inproceedings{gillet2006enst,
title={ENST-Drums: an extensive audio-visual database for drum signals processing.},
author={Gillet, Olivier and Richard, Ga{\"e}l},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
pages={156--159},
year={2006}
}

@inproceedings{southall2017mdb,
title={MDB Drums: An annotated subset of MedleyDB for automatic drum transcription},
author={Southall, Carl and Wu, Chih-Wei and Lerch, Alexander and Hockman, Jason},
year={2017},
booktitle={Late Breaking/Demos of the 18th International Society for Music Information Retrieval Conference ({ISMIR})},
}

@inproceedings{chuang2020beat,
title={Beat and Downbeat Tracking of Symbolic Music Data Using Deep Recurrent Neural Networks},
author={Chuang, Yi-Chin and Su, Li},
booktitle={Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
pages={346--352},
year={2020},
organization={IEEE}
}

@inproceedings{thickstun2018invariances,
title={Invariances and Data Augmentation for Supervised Music Transcription},
author={John Thickstun and
        Za{\"{\i}}d Harchaoui and
        Dean P. Foster and
        Sham M. Kakade},
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
pages={2241--2245},
year={2018},
doi={10.1109/icassp.2018.8461686}
}

@inproceedings{bock2016madmom,
title={madmom: {A} New {P}ython Audio and Music Signal Processing Library},
author={B{\"o}ck, Sebastian and Korzeniowski, Filip and Schl{\"u}ter, Jan and Krebs, Florian and Widmer, Gerhard},
booktitle={Proceedings of the {ACM} Conference on Multimedia Conference},
pages={1174--1178},
year={2016},
doi={10.1145/2964284.2973795}
}

@inproceedings{mcfee2015librosa,
title={librosa: Audio and music signal analysis in {P}ython},
author={McFee, Brian and Raffel, Colin and Liang, Dawen and Ellis, Daniel PW and McVicar, Matt and Battenberg, Eric and Nieto, Oriol},
booktitle={Proceedings of the 14th Python in Science Conference},
volume={8},
pages={18--25},
year={2015},
organization={Citeseer},
doi={10.25080/majora-7b98e3ed-003}
}

@INPROCEEDINGS{burgoyne2011anexpert,
AUTHOR = {John Ashley Burgoyne and Jonathan Wild and Ichiro Fujinaga},
TITLE = {An Expert Ground Truth Set for Audio Chord Recognition and Music
Analysis},
BOOKTITLE = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
PAGES = {633-638},
YEAR = {2011}
}

@INPROCEEDINGS{mauch2010approximate,
AUTHOR = {Matthias Mauch and Simon Dixon},
TITLE = {Approximate Note Transcription for the Improved Identification of
Difficult Chords},
BOOKTITLE = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
PAGES = {135-140},
YEAR = {2010}
}

@INPROCEEDINGS{thickstun2017learning,
title={Learning Features of Music from Scratch},
author = {John Thickstun and Zaid Harchaoui and Sham M. Kakade},
year={2017},
booktitle = {International Conference on Learning Representations (ICLR)}
}