hypothesis-tests-reference.Rmd

---
title: "hypothesis test formula sheet"
output: pdf_document
classoption: landscape
geometry: margin=1.5cm
header-includes:
  \usepackage{float}
  \usepackage{makecell}
---

\renewcommand{\arraystretch}{2.5}

### One sample tests

\begin{figure}[H]
  \begin{center}
    \begin{tabular}{lccccc}
    
    name & known parameters & null hypothesis & test statistic & null distribution & p-value (two sided) \\
     \hline
    
    one sample z-test &
    $\mu$ unknown, $\sigma^2$ known &
    $H_0: \mu = \mu_0$ &
    $Z_\text{obs} = \displaystyle {\bar x - \mu_0 \over {\sigma \over \sqrt n}}$ &
    N(0, 1) &
    $2 \cdot P(Z < -|Z_\text{obs}|)$ \\
    
    \thead{one sample z-test \\ for a proportion} & 
    $\pi$ unknown &
    $H_0: \pi = \pi_0$ &
    $Z_\text{obs} = \displaystyle {\hat \pi - \pi_0 \over \sqrt{\pi_0 (1 - \pi_0)  \over n}}$ & 
    N(0, 1) & 
    $2 \cdot P(Z < -|Z_\text{obs}|)$ \\
    
    one sample t-test &
    $\mu, \sigma^2$ unknown &
    $H_0: \mu = \mu_0$ & $T_\text{obs} = \displaystyle {\bar x - \mu_0 \over {s \over \sqrt n}}$ &
    $t_{n-1}$ &
    $2 \cdot P(t_{n-1} < -|T_\text{obs}|)$ \\
    
    chi-square test for variance &
    $\mu, \sigma^2$ unknown &
    $H_0: \sigma^2 = \sigma^2_0$ &
    $T = \displaystyle {(n - 1) \cdot s^2 \over \sigma^2_0}$ &
    $\chi^2_{n-1}$ &
    TODO \\
    
    sign test &
    none &
    $H_0: m = m_0$ &
    $B_\text{obs} = \sum_i I_{x_i > m_0}$ &
    Binomial($\sum_i I_{x_i \neq m_0}, \frac 12$) &
    $2 \cdot \min(P(B \ge B_\text{obs}), P(B \le B_\text{obs}))$
    \end{tabular}
  \end{center}
\end{figure}

### Two sample tests

\begin{figure}[H]
  \begin{center}
    \begin{tabular}{lccccc}
    
    name & known parameters & null hypothesis & test statistic & null distribution & p-value (two sided) \\
     \hline
    
    two sample z-test &
    \thead{$\mu_1, \mu_2$ unknown \\ $\sigma_1^2, \sigma_2^2$ known} &
    $H_0: \mu_1 - \mu_2 = \delta_0$ &
    $Z_\text{obs} = \displaystyle {(\bar x - \bar y) - \delta_0 \over \sqrt{{\sigma_1^2 \over n_1} + {\sigma_2^2 \over n_2}}}$ &
    N(0, 1) &
    $2 \cdot P(Z < -|Z_\text{obs}|)$ \\
    
    \thead{two sample z-test \\ for a proportion} & 
    $\pi_1, \pi_2$ unknown &
    $H_0: \pi_1 = \pi_2$ &
    $Z_\text{obs} = \displaystyle {\hat \pi_1 - \hat \pi_2 \over \sqrt{\hat \pi (1 - \hat \pi) \left({1 \over n_1} + {1 \over n_2} \right)}}$ & 
    N(0, 1) & 
    $2 \cdot P(Z < -|Z_\text{obs}|)$ \\
    
    equal variance t-test &
    \thead{$\mu_1, \mu_2$ \\ $\sigma_1^2 = \sigma_2^2$ unknown} &
    $H_0: \mu_1 - \mu_2 = \delta_0$ &
    $T_\text{obs} = \displaystyle {(\bar x - \bar y) - \delta_0 \over 
      s_\text{pooled} \sqrt{{1 \over n_1} + {1 \over n_2}}}$ &
    $t_{n_1 + n_2 - 2}$ &
    $2 \cdot P(t_{n_1 + n_2 - 2} < -|T_\text{obs}|)$ \\
    
    welch's t-test &
    \thead{$\mu_1, \mu_2$ \\ $\sigma_1^2 \neq \sigma_2^2$ unknown} &
    $H_0: \mu_1 - \mu_2 = \delta_0$ &
    $T_\text{obs} = \displaystyle {(\bar x - \bar y) - \delta_0 \over 
      \sqrt{{s_1^2 \over n_1} + {s_2^2 \over n_2}}}$ &
    $t_\nu$ &
    $2 \cdot P(t_\nu < -|T_\text{obs}|)$ \\
    
    f-test for equal variance &
    \thead{$\mu_1, \mu_2$ \\ $\sigma_1^2, \sigma_2^2$ unknown} &
    $H_0: \sigma_1^2 = \sigma_2^2$ &
    $F_\text{obs} = \displaystyle {s_1^2 \over s_2^2}$ &
    $F_{n_1 - 1, n_2 - 1}$ &
    TODO \\

    \end{tabular}
  \end{center}
\end{figure}

where

\begin{align}
s_\text{pooled} &= \sqrt{{(n_1 - 1) \cdot s_1^2 + (n_2 - 1) \cdot s_2^2 \over n_1 + n_2 - 2}} 
\qquad \qquad
\nu = { \left({s_1^2 \over n_1} + {s_2^2 \over n_2}\right)^2 \over
  {\left({s_1^2 \over n_1} \right)^2 \over n_1 - 1} +
  {\left({s_2^2 \over n_2} \right)^2 \over n_2 - 1}
}
\qquad \qquad
\hat \pi = {n_1 \hat \pi_1 + n_2 \hat \pi_2 \over n_1 + n_2}
\end{align}

### Paired two-sample tests (incomplete, don't trust these yet)

Suppose $x_i$ is paired with $y_i$. Then let $d_i = x_i - y_i$.

\begin{figure}[H]
  \begin{center}
    \begin{tabular}{lccccc}
    
    name & known parameters & null hypothesis & test statistic & null distribution & p-value (two sided) \\
     \hline

    paired z-test &
    $\mu$ unknown, $\sigma^2$ known &
    $H_0: \mu_1 - \mu_2 = \delta_0$ &
    $Z_\text{obs} = \displaystyle {\bar d - \mu_0 \over {\sigma \over \sqrt n}}$ & N(0, 1) &
    $2 \cdot P(Z < -|Z_\text{obs}|)$ \\
    
    paired t-test & 
    $\mu, \sigma^2$ unknown &
    $H_0: \mu_1 - \mu_2 = \delta_0$ &
    $T_\text{obs} = \displaystyle {\bar d - \delta_0 \over {s_d \over \sqrt n}}$ & $t_{n-1}$ &
    $2 \cdot P(t_{n-1} < -|T_\text{obs}|)$ \\
    
    paired sign test &
    none &
    $H_0: m_1 - m_2 = \delta_0$ &
    $B_\text{obs} = \sum_i I_{d_i > \delta_0}$ &
    Binomial($\sum_i I_{d_i \neq \delta_0}, \frac 12$) &
    $2 \cdot \min(P(B \ge B_\text{obs}), P(B \le B_\text{obs}))$
    
    \end{tabular}
  \end{center}
\end{figure}

### Related to linear regression and ANOVA


A one-way ANOVA table looks like

\begin{figure}[H]
  \begin{center}
    \begin{tabular}{lccccc}
    
    source & df & SS & MS & F & p-value \\
    \hline
    
    treatment &
    $k - 1$ &
    $\text{SSTreatment}$ &
    $\text{MSTreatment} = {\text{SSTreatment} \over k - 1}$ &
    ${\text{MSTreatment} \over \text{MSError}}$ &
    $P(F_{k - 1, N - k} > F_\text{obs})$ \\
    
    error &
    $N - k$ &
    $\text{SSError}$ &
    $\text{MSError} = {\text{SSError} \over N - k}$ \\
    
    total &
    $N - 1$ &
    $\text{SSTotal}$
    
    \end{tabular}
  \end{center}
\end{figure}


where

\begin{align}
\text{SSTreatment} 
&= \sum_{i=1}^k \sum_{j=1}^{n_i} (\bar y_{i \cdot} - \bar y_{\cdot \cdot})^2 
= \sum_{i=1}^k n_i (\bar y_{i \cdot} - \bar y_{\cdot \cdot})^2 \\
\text{SSError} 
&= \sum_{i=1}^k \sum_{j=1}^{n_i} (y_{ij} - \bar y_{i \cdot})^2
= \sum_{i=1}^k (n_i - 1) s^2_i \\
\text{SSTotal} 
&= \text{SSTreatment} + \text{SSError}
= \sum_{i=1}^k \sum_{j=1}^{n_i} (y_{ij} - \bar y_{\cdot \cdot})^2
\end{align}

We use this to compare group means (i.e. categorical predictor). When we have continuous predictors we use simple linear regression which results in the following:

\begin{figure}[H]
  \begin{center}
    \begin{tabular}{lccccc}
    
    name & null hypothesis & test statistic & null distribution & p-value \\
     \hline
    
    t-test for $\beta_0$ &
    $H_0: \beta_0 = c$ &
    $T_\text{obs} = \displaystyle {\hat \beta_0 - c \over \hat \sigma \sqrt{\frac 1n + \bar x^2 / \sum_{i=1}^n (x_i - \bar x)^2 }}$ &
    $t_{n-2}$ &
    $2 \cdot P(T < -|T_\text{obs}|)$ \\
    
    t-test for $\beta_1$ &
    $H_0: \beta_1 = c$ &
    $T_\text{obs} = \displaystyle {\hat \beta_1 - c \over \hat \sigma / \sqrt{\sum_{i=1}^n (x_i - \bar x)^2}}$ &
    $t_{n-2}$ &
    $2 \cdot P(T < -|T_\text{obs}|)$ \\
    
    overall f-test for linear model &
    TODO &
    TODO &
    TODO &
    TODO \\
    
    \end{tabular}
  \end{center}
\end{figure}

Miscelleanous simple linear regression formulas not from hypothesis tests

\begin{align}
y_i &= \beta_0 + \beta_1 x_i + \varepsilon_i \qquad \varepsilon_i \sim \textrm{Normal}(0, \sigma^2) \\
\rho &= {
  \sum_{i=1}^n (x_i - \bar x) (y_i - \bar y) \over
  \sqrt{\sum_{i=1}^n (x_i - \bar x)^2}
  \sqrt{\sum_{i=1}^n (y_i - \bar y)^2}} 
  = {s_{xy} \over s_x s_y} \\
\hat \beta_1 &= {
  \sum_{i=1}^n (x_i - \bar x) (y_i - \bar y) \over 
  \sum_{i=1}^n (x_i - \bar x)^2}
  = \rho \cdot {s_y \over s_x} \\
\hat \beta_0 &= \hat y - \hat \beta_1 \bar x \\
\hat y_i &= \hat \beta_0 + \hat \beta_1 x_i \\
e_i &= y_i - \hat y_i \\
\text{SSE} &= \sum_{i=1}^n (y_i - \hat y_i)^2 \\
\hat \sigma^2 &= \text{MSE} = {\sum_{i=1}^n (y_i - \hat y_i)^2 \over n - 2} 
\end{align}

Confidence intervals for means and future predictions at $x^*$ are given by

\begin{align}
TODO
\end{align}

### Post-hoc tests for ANOVA

\begin{figure}[H]
  \begin{center}
    \begin{tabular}{lccccc}
    
    name & null hypothesis & test statistic & null distribution & p-value \\
     \hline
     
    linear contrast on group means &
    TODO &
    TODO &
    TODO &
    TODO \\
    
    tukey's honestly significant differences &
    TODO &
    TODO &
    TODO &
    TODO \\
    
    \end{tabular}
  \end{center}
\end{figure}

### Nonparametric tests

- Kruskal-Wallis
- Rank-Sum
- Signed-Rank

Bootstrap and permutation (parametric, one and two sample, paired)