-
Notifications
You must be signed in to change notification settings - Fork 0
/
ddl2024mlbio.tex
316 lines (262 loc) · 10.7 KB
/
ddl2024mlbio.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
% template.tex, dated April 5 2013
% This is a template file for Annual Reviews 1 column Journals
%
% Compilation using ar-1col-S2O.cls' - version 1.0, Aptara Inc.
% (c) 2013 AR
%
% Steps to compile: latex latex latex
%
% For tracking purposes => this is v1.0 - Apr. 2013
\documentclass{ar-1col-S2O}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage[ruled,procnumbered]{algorithm2e}%
\usepackage[numbers]{natbib}
\usepackage[nameinlink]{cleveref}
\usepackage{listings}%
\usepackage{url}
\setcounter{secnumdepth}{4}
% Metadata Information
\jname{Xxxx. Xxx. Xxx. Xxx.}
\jvol{AA}
\jyear{YYYY}
\doi{10.1146/((please add article doi))}
% tikz libs
\usepackage{tikz} % fancy diagrams
\usetikzlibrary{positioning}
\usetikzlibrary{shapes,snakes}
\usetikzlibrary{arrows.meta}
\usetikzlibrary{external} % for saving tikz fig to pdf
\tikzexternalize
% Change name of algorithm to "snippet"
\SetAlgorithmName{Snippet}{snippet}{list of snippets}
\makeatletter
\renewcommand{\algorithmautorefname}{Snippet}
\makeatother
%cref alias
\newcounter{snippet}
\makeatletter
% https://tex.stackexchange.com/a/212030/26355
\AtBeginEnvironment{snippet}{\let\c@algocf\c@snippet\crefalias{algocf}{snippet}}
\makeatother
\crefname{algorithm}{snip.}{snips.}
\Crefname{algorithm}{Snippet}{Snippets}
% Document starts
\begin{document}
% Page header
\markboth{Author et al.}{Short title}
% Title
\title{Machine Learning in Biology}
%Authors, affiliations address.
\author{Author B. Authorone,$^1$ Firstname C. Authortwo,$^2$ and D. Name Authorthree$^3$
\affil{$^1$Department/Institute, University, City, Country, Postal code; email: [email protected]}
\affil{$^2$Department/Institute, University, City, Country, Postal code}
\affil{$^3$Department/Institute, University, City, Country, Postal code}}
%Abstract
\begin{abstract}
Abstract text, approximately 150 words.
\end{abstract}
%Keywords, etc.
\begin{keywords}
keywords, separated by comma, no full stop, lowercase
\end{keywords}
\maketitle
%Table of Contents
\tableofcontents
% Heading 1
\section{INTRODUCTION}
Please begin the main text of your article here.
%%% Fig of search terms
\begin{figure}[htb]
\centering
\begin{tikzpicture}[node distance=2.5cm,auto]
\node[draw=none, anchor=north] (biot) {Biology Term, One of:};
\node[below=0.5cm of biot] (bio) {$\left\{ \begin{matrix}
\text{biodiversity} \\
\text{biogeography} \\
\text{bioinformatics} \\
\text{biology} \\
\text{conservation} \\
\text{developmental biology} \\
\text{disease classification} \\
\text{disease ecology}
\text{ecology} \\
\text{environmental biology} \\
\text{evolution} \\
\text{genetics} \\
\text{genomics} \\
\text{immunology} \\
\text{marine biology} \\
\text{medical imaging} \\
\text{metabolomics} \\
\text{microbiology} \\
\text{neurobiology} \\
\text{paleontology} \\
\text{phylogenetics} \\
\text{phylogenomics} \\
\text{proteomics} \\
\text{systems biology} \\
\end{matrix}\right\}$};
\node[left=0.25cm of bio] (cross) {$\mathbf{\times}$};
\node[left=0.25cm of cross] (ml) {$\left\{ \begin{matrix}
\text{OLS} \\
\text{random forest} \\
\text{support vector machine} \\
\text{gradient boosted trees}
\end{matrix}\right\}$};
\node[above=0.5 of ml] (mlt) {ML Term, One of:};
\node[right=0.25cm of bio] (equals) {$\mathbf{=}$};
\node[right=0.25cm of equals] (query) {$\left\{ \begin{matrix}
\text{``OLS'' ``biodiversity''} \\
\text{``OLS'' ``biogeography''} \\
\cdots \\
\text{``random forest'' ``genomics''} \\
\cdots \\
\end{matrix}\right\}$};
\node[above=0.5 of query] (queryt) {Search Query, e.g.};
%\node[draw, circle, minimum size=1.5cm, left=2cm of dummy] (x) {$P(x)$};
%\node[draw, circle, minimum size=1.5cm, right=2cm of dummy] (x2) {$P(x')$};
%\node[draw=none, below=0.5cm of dummy] (pmf2) {$A(x,x')q(x,x')$};
%
%\draw[-{Latex[length=3mm]}] (x.north east) -- (x2.north west);
%\draw[{Latex[length=3mm]}-] (x.south east) -- (x2.south west);
%\node[draw=none, below=2cm of dummy] (db) {Detailed Balance: $A(x',x)q(x',x)P(x) = A(x,x')q(x,x')P(x')$};
\end{tikzpicture}
\caption{Construction of search queries by selecting one machine learning term and one biology term.}
\label{fig:search}
\end{figure}
\input{OLS.tex}
\input{SVM.tex}
\input{rf.tex}
\subsection{Gradient Boosting}
Where Random Forests \cite{breiman2001random} create an ensemble by bagging, another approach to building an ensemble involves developing the components models iteratively and is called, ``boosting''. Let $f_{m-1}(x_i)$ be the boosted model's prediction after $m-1$ components have been added. We seek the next iteration, $f_m(x_i) = f_{m-1}(x_i) + \gamma_m g_m(x_i)$. For example, one could fix $\gamma_m=1$ and fit $g_m$ to minimize the residual loss, $L(y_i-f_{m-1}(x_i), g_m(x_i))$. The way to determine $g_m$ and $\gamma_m$ depends on the exact nature of the boosting.
One subtype of boosting is called, ``Gradient Boosted Models'' \cite{natekin2013gradient} or GBMs. This approach fits $g_m$ to minimize the loss on the negative gradient, $-\frac{\partial L(y_i)}{\partial f_{m-1}}$. Then one finds the weight, $\gamma_m$ to minimize the overall loss, $L(y_i,f_{m-1}(x_i) + \gamma_m g_m(x_i))$. The gradient helps direct the next model more carefully than generic boosting.
Libraries to implement GBMs exist in many programming langauges. We offer two code snippets, one in R and one in Python, to demonstrate one way to deploy this method. \Cref{alg:xgboost} fits a Gradient Boosted Trees model with the XGBoost \cite{chen2016xgboost}. Likewise, in R, \Cref{alg:gbm} trains a similar model with the \texttt{gbm} package.
\begin{algorithm}
\caption{Python GBM example using XGBoost}\label{alg:xgboost}
\begin{lstlisting}[language=Python]
import xgboost as xgb
model = xgb.XGBRegressor(n_estimators=10)
model.fit(Xtrain, Ytrain)
pred = model.predict(Xtest)
\end{lstlisting}
\end{algorithm}
\begin{algorithm}
\caption{R GBM example using \texttt{gbm}}\label{alg:xgboost}
\begin{lstlisting}[language=Python]
TODO
\end{lstlisting}
\end{algorithm}
%Heading 1
\section{FIRST-LEVEL HEADING}
This is dummy text.
% Heading 2
\subsection{Second-Level Heading}
This is dummy text. This is dummy text. This is dummy text. This is dummy text.
% Heading 3
\subsubsection{Third-Level Heading}
This is dummy text. This is dummy text. This is dummy text. This is dummy text.
% Heading 4
\paragraph{Fourth-Level Heading} Fourth-level headings are placed as part of the paragraph.
%Example of a Figure
\section{ELEMENTS\ OF\ THE\ MANUSCRIPT}
\subsection{Figures}Figures should be cited in the main text in chronological order. This is dummy text with a citation to the first figure (\textbf{Figure \ref{fig1}}). Citations to \textbf{Figure \ref{fig1}} (and other figures) will be bold.
\begin{figure}[h]
%\includegraphics[width=3in]{SampleFigure}
\caption{Figure caption with descriptions of parts a and b}
\label{fig1}
\end{figure}
% Example of a Table
\subsection{Tables} Tables should also be cited in the main text in chronological order (\textbf {Table \ref{tab1}}).
\begin{table}[h]
\tabcolsep7.5pt
\caption{Table caption}
\label{tab1}
\begin{center}
\begin{tabular}{@{}l|c|c|c|c@{}}
\hline
Head 1 &&&&Head 5\\
{(}units)$^{\rm a}$ &Head 2 &Head 3 &Head 4 &{(}units)\\
\hline
Column 1 &Column 2 &Column3$^{\rm b}$ &Column4 &Column\\
Column 1 &Column 2 &Column3 &Column4 &Column\\
Column 1 &Column 2 &Column3 &Column4 &Column\\
Column 1 &Column 2 &Column3 &Column4 &Column\\
\hline
\end{tabular}
\end{center}
\begin{tabnote}
$^{\rm a}$Table footnote; $^{\rm b}$second table footnote.
\end{tabnote}
\end{table}
% Example of lists
\subsection{Lists and Extracts} Here is an example of a numbered list:
\begin{enumerate}
\item List entry number 1,
\item List entry number 2,
\item List entry number 3,\item List entry number 4, and
\item List entry number 5.
\end{enumerate}
Here is an example of a extract.
\begin{extract}
This is an example text of quote or extract.
This is an example text of quote or extract.
\end{extract}
\subsection{Sidebars and Margin Notes}
% Margin Note
\begin{marginnote}[]
\entry{Term A}{definition}
\entry{Term B}{definition}
\entry{Term C}{defintion}
\end{marginnote}
\begin{textbox}[h]\section{SIDEBARS}
Sidebar text goes here.
\subsection{Sidebar Second-Level Heading}
More text goes here.\subsubsection{Sidebar third-level heading}
Text goes here.\end{textbox}
\subsection{Equations}
% Example of a single-line equation
\begin{equation}
a = b \ {\rm ((Single\ Equation\ Numbered))}
\end{equation}
%Example of multiple-line equation
Equations can also be multiple lines as shown in Equations 2 and 3.
\begin{eqnarray}
c = 0 \ {\rm ((Multiple\ Lines, \ Numbered))}\\
ac = 0 \ {\rm ((Multiple \ Lines, \ Numbered))}
\end{eqnarray}
% Summary Points
\begin{summary}[SUMMARY POINTS]
\begin{enumerate}
\item Summary point 1. These should be full sentences.
\item Summary point 2. These should be full sentences.
\item Summary point 3. These should be full sentences.
\item Summary point 4. These should be full sentences.
\end{enumerate}
\end{summary}
% Future Issues
\begin{issues}[FUTURE ISSUES]
\begin{enumerate}
\item Future issue 1. These should be full sentences.
\item Future issue 2. These should be full sentences.
\item Future issue 3. These should be full sentences.
\item Future issue 4. These should be full sentences.
\end{enumerate}
\end{issues}
%Disclosure
\section*{DISCLOSURE STATEMENT}
If the authors have noting to disclose, the following statement will be used: The authors are not aware of any affiliations, memberships, funding, or financial holdings that
might be perceived as affecting the objectivity of this review.
% Acknowledgements
\section*{ACKNOWLEDGMENTS}
Acknowledgements, general annotations, funding.
% References
%
% Margin notes within bibliography
\section*{LITERATURE\ CITED}
To download the appropriate bibliography style file, please see \url{https://www.annualreviews.org/page/authors/general-information}.
\noindent
Please see the Style Guide document for instructions on preparing your Literature Cited.
The citations should be listed in order of appearance, with titles. For example:
\bibliography{cites.bib}% common bib file
\end{document}