-
Notifications
You must be signed in to change notification settings - Fork 102
/
paper.bib
262 lines (237 loc) · 9.42 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
@inproceedings{abadi2016tensorflow,
title={Tensorflow: A system for large-scale machine learning},
author={Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others},
booktitle={{USENIX} symposium on operating systems design and implementation ({OSDI})},
pages={265--283},
year={2016}
}
@article{wu2020multi,
title={Multi-Instrument Automatic Music Transcription With Self-Attention-Based Instance Segmentation},
author={Wu, Yu-Te and Chen, Berlin and Su, Li},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume={28},
pages={2796--2809},
year={2020},
publisher={IEEE},
doi={10.1109/taslp.2020.3030482}
}
@inproceedings{hawthorne2018enabling,
author = {Curtis Hawthorne and
Andriy Stasyuk and
Adam Roberts and
Ian Simon and
Cheng{-}Zhi Anna Huang and
Sander Dieleman and
Erich Elsen and
Jesse H. Engel and
Douglas Eck},
title = {Enabling Factorized Piano Music Modeling and Generation with the {MAESTRO}
Dataset},
booktitle = {International Conference on Learning Representations ({ICLR})},
year = {2019},
}
@inproceedings{parmar2018image,
author = {Niki Parmar and
Ashish Vaswani and
Jakob Uszkoreit and
Lukasz Kaiser and
Noam Shazeer and
Alexander Ku and
Dustin Tran},
title = {Image Transformer},
booktitle = {Proceedings of the 35th International Conference on Machine Learning ({ICML})},
pages = {4052--4061},
year = {2018},
}
@incollection{Chen2018DeepLabV3,
doi = {10.1007/978-3-030-01234-2_49},
url = {https://doi.org/10.1007%2F978-3-030-01234-2_49},
year = 2018,
publisher = {Springer International Publishing},
pages = {833--851},
author = {Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam},
title = {Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation},
booktitle = {Computer Vision {\textendash} {ECCV}}
}
@article{su2015combining,
author={Su, Li and Yi{-}Hsuan Yang},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
title={Combining Spectral and Temporal Representations for Multipitch Estimation of Polyphonic Music},
year={2015},
volume={23},
number={10},
pages={1600-1612},
publisher={IEEE},
doi={10.1109/taslp.2015.2442411}
}
@inproceedings{kelz2016potential,
title={On the Potential of Simple Framewise Approaches to Piano Transcription},
author={Rainer Kelz and
Matthias Dorfer and
Filip Korzeniowski and
Sebastian B{\"{o}}ck and
Andreas Arzt and
Gerhard Widmer},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
pages={475--481},
year={2016},
}
@inproceedings{wei2021improving,
title={Improving automatic drum transcription using large-scale audio-to-{MIDI} aligned data},
author={Wei, I-Chieh and Wu, Chih-Wei and Su, Li},
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
year={2021},
doi={10.1109/icassp39728.2021.9414409}
}
@inproceedings{wu2018automatic,
title={Automatic Music Transcription Leveraging Generalized Cepstral Features and Deep Learning},
author={Wu, Yu-Te and Chen, Berlin and Su, Li},
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
pages={401--405},
year={2018},
doi={10.1109/icassp.2018.8462079}
}
@INPROCEEDINGS{su2018vocal,
author={Su, Li},
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
title={Vocal Melody Extraction Using Patch-Based {CNN}},
year={2018},
pages={371-375},
doi={10.1109/icassp.2018.8462420}
}
@inproceedings{fu2019hierarchical,
title={Hierarchical classification networks for singing voice segmentation and transcription},
author={Fu, Z.-S. and Su, L.},
year={2019},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})}
}
@article{yamada2019shakedrop,
title={Shakedrop regularization for deep residual learning},
author={Yamada, Y. and Iwamura, M. and Akiba, T. and Kise, K.},
journal={IEEE Access},
volume={7},
pages={186126--186136},
year={2019},
publisher={IEEE},
doi={10.1109/access.2019.2960566}
}
@article{miyato2018virtual,
author = {Takeru Miyato and
Shin{-}ichi Maeda and
Masanori Koyama and
Shin Ishii},
title = {Virtual Adversarial Training: {A} Regularization Method for Supervised
and Semi-Supervised Learning},
journal = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
volume = {41},
number = {8},
pages = {1979--1993},
year = {2019},
url = {https://doi.org/10.1109/TPAMI.2018.2858821},
doi = {10.1109/TPAMI.2018.2858821},
timestamp = {Mon, 26 Oct 2020 09:04:26 +0100},
biburl = {https://dblp.org/rec/journals/pami/MiyatoMKI19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{chen2019harmony,
title={Harmony Transformer: Incorporating chord segmentation into harmony recognition},
author={Chen, Tsung-Ping and Su, Li},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
year={2019}
}
@inproceedings{mora2010characterization,
title={Characterization and melodic similarity of a cappella flamenco cantes},
author={Mora, Joaqu{\'\i}n and G{\'o}mez, Francisco and G{\'o}mez, Emilia and Escobar-Borrego, Francisco and D{\'\i}az-B{\'a}{\~n}ez, Jos{\'e} Miguel},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
pages={9--13},
year={2010}
}
@article{hsu2009improvement,
title={On the improvement of singing voice separation for monaural recordings using the MIR-1K dataset},
author={Hsu, Chao-Ling and Jang, Jyh-Shing Roger},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume={18},
number={2},
pages={310--319},
year={2009},
publisher={IEEE},
doi={10.1109/tasl.2009.2026503}
}
@inproceedings{molina2014evaluation,
title={Evaluation framework for automatic singing transcription},
author={Molina, E. and Barbancho-Perez, A. M. and Tard{\'o}n, L. J. and Barbancho-Perez, I. and others},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
year={2014}
}
@inproceedings{gillet2006enst,
title={ENST-Drums: an extensive audio-visual database for drum signals processing.},
author={Gillet, Olivier and Richard, Ga{\"e}l},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
pages={156--159},
year={2006}
}
@inproceedings{southall2017mdb,
title={MDB Drums: An annotated subset of MedleyDB for automatic drum transcription},
author={Southall, Carl and Wu, Chih-Wei and Lerch, Alexander and Hockman, Jason},
year={2017},
booktitle={Late Breaking/Demos of the 18th International Society for Music Information Retrieval Conference ({ISMIR})},
}
@inproceedings{chuang2020beat,
title={Beat and Downbeat Tracking of Symbolic Music Data Using Deep Recurrent Neural Networks},
author={Chuang, Yi-Chin and Su, Li},
booktitle={Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
pages={346--352},
year={2020},
organization={IEEE}
}
@inproceedings{thickstun2018invariances,
title={Invariances and Data Augmentation for Supervised Music Transcription},
author={John Thickstun and
Za{\"{\i}}d Harchaoui and
Dean P. Foster and
Sham M. Kakade},
booktitle={{IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
pages={2241--2245},
year={2018},
doi={10.1109/icassp.2018.8461686}
}
@inproceedings{bock2016madmom,
title={madmom: {A} New {P}ython Audio and Music Signal Processing Library},
author={B{\"o}ck, Sebastian and Korzeniowski, Filip and Schl{\"u}ter, Jan and Krebs, Florian and Widmer, Gerhard},
booktitle={Proceedings of the {ACM} Conference on Multimedia Conference},
pages={1174--1178},
year={2016},
doi={10.1145/2964284.2973795}
}
@inproceedings{mcfee2015librosa,
title={librosa: Audio and music signal analysis in {P}ython},
author={McFee, Brian and Raffel, Colin and Liang, Dawen and Ellis, Daniel PW and McVicar, Matt and Battenberg, Eric and Nieto, Oriol},
booktitle={Proceedings of the 14th Python in Science Conference},
volume={8},
pages={18--25},
year={2015},
organization={Citeseer},
doi={10.25080/majora-7b98e3ed-003}
}
@INPROCEEDINGS{burgoyne2011anexpert,
AUTHOR = {John Ashley Burgoyne and Jonathan Wild and Ichiro Fujinaga},
TITLE = {An Expert Ground Truth Set for Audio Chord Recognition and Music
Analysis},
BOOKTITLE = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
PAGES = {633-638},
YEAR = {2011}
}
@INPROCEEDINGS{mauch2010approximate,
AUTHOR = {Matthias Mauch and Simon Dixon},
TITLE = {Approximate Note Transcription for the Improved Identification of
Difficult Chords},
BOOKTITLE = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})},
PAGES = {135-140},
YEAR = {2010}
}
@INPROCEEDINGS{thickstun2017learning,
title={Learning Features of Music from Scratch},
author = {John Thickstun and Zaid Harchaoui and Sham M. Kakade},
year={2017},
booktitle = {International Conference on Learning Representations (ICLR)}
}