Z-test_vs_ANOVA_V3.html

<!DOCTYPE html>

<html>

<head>

<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />


<title>Estimating significance of prediction differences</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/united.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<style>h1 {font-size: 34px;}
       h1.title {font-size: 38px;}
       h2 {font-size: 30px;}
       h3 {font-size: 24px;}
       h4 {font-size: 18px;}
       h5 {font-size: 16px;}
       h6 {font-size: 12px;}
       code {color: inherit; background-color: rgba(0, 0, 0, 0.04);}
       pre:not([class]) { background-color: white }</style>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-5.1.0/css/all.css" rel="stylesheet" />
<link href="site_libs/font-awesome-5.1.0/css/v4-shims.css" rel="stylesheet" />

<style type="text/css">
  code{white-space: pre-wrap;}
  span.smallcaps{font-variant: small-caps;}
  span.underline{text-decoration: underline;}
  div.column{display: inline-block; vertical-align: top; width: 50%;}
  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
  ul.task-list{list-style: none;}
    </style>

<style type="text/css">code{white-space: pre;}</style>
<script type="text/javascript">
if (window.hljs) {
  hljs.configure({languages: []});
  hljs.initHighlightingOnLoad();
  if (document.readyState && document.readyState === "complete") {
    window.setTimeout(function() { hljs.initHighlighting(); }, 0);
  }
}
</script>


<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
img {
  max-width:100%;
}
.tabbed-pane {
  padding-top: 12px;
}
.html-widget {
  margin-bottom: 20px;
}
button.code-folding-btn:focus {
  outline: none;
}
summary {
  display: list-item;
}
pre code {
  padding: 0;
}
</style>


<style type="text/css">
.dropdown-submenu {
  position: relative;
}
.dropdown-submenu>.dropdown-menu {
  top: 0;
  left: 100%;
  margin-top: -6px;
  margin-left: -1px;
  border-radius: 0 6px 6px 6px;
}
.dropdown-submenu:hover>.dropdown-menu {
  display: block;
}
.dropdown-submenu>a:after {
  display: block;
  content: " ";
  float: right;
  width: 0;
  height: 0;
  border-color: transparent;
  border-style: solid;
  border-width: 5px 0 5px 5px;
  border-left-color: #cccccc;
  margin-top: 5px;
  margin-right: -10px;
}
.dropdown-submenu:hover>a:after {
  border-left-color: #adb5bd;
}
.dropdown-submenu.pull-left {
  float: none;
}
.dropdown-submenu.pull-left>.dropdown-menu {
  left: -100%;
  margin-left: 10px;
  border-radius: 6px 0 6px 6px;
}
</style>

<script type="text/javascript">
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.tab('show');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');

  // Navbar adjustments
  var navHeight = $(".navbar").first().height() + 15;
  var style = document.createElement('style');
  var pt = "padding-top: " + navHeight + "px; ";
  var mt = "margin-top: -" + navHeight + "px; ";
  var css = "";
  // offset scroll position for anchor links (for fixed navbar)
  for (var i = 1; i <= 6; i++) {
    css += ".section h" + i + "{ " + pt + mt + "}\n";
  }
  style.innerHTML = "body {" + pt + "padding-bottom: 40px; }\n" + css;
  document.head.appendChild(style);
});
</script>

<!-- tabsets -->

<style type="text/css">
.tabset-dropdown > .nav-tabs {
  display: inline-table;
  max-height: 500px;
  min-height: 44px;
  overflow-y: auto;
  border: 1px solid #ddd;
  border-radius: 4px;
}

.tabset-dropdown > .nav-tabs > li.active:before {
  content: "";
  font-family: 'Glyphicons Halflings';
  display: inline-block;
  padding: 10px;
  border-right: 1px solid #ddd;
}

.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
  content: "&#xe258;";
  border: none;
}

.tabset-dropdown > .nav-tabs.nav-tabs-open:before {
  content: "";
  font-family: 'Glyphicons Halflings';
  display: inline-block;
  padding: 10px;
  border-right: 1px solid #ddd;
}

.tabset-dropdown > .nav-tabs > li.active {
  display: block;
}

.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
  border: none;
  display: inline-block;
  border-radius: 4px;
  background-color: transparent;
}

.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
  display: block;
  float: none;
}

.tabset-dropdown > .nav-tabs > li {
  display: none;
}
</style>

<!-- code folding -->


<style type="text/css">

#TOC {
  margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
  position: relative;
  width: 100%;
}
}

@media print {
.toc-content {
  /* see https://github.com/w3c/csswg-drafts/issues/4434 */
  float: right;
}
}

.toc-content {
  padding-left: 30px;
  padding-right: 40px;
}

div.main-container {
  max-width: 1200px;
}

div.tocify {
  width: 20%;
  max-width: 260px;
  max-height: 85%;
}

@media (min-width: 768px) and (max-width: 991px) {
  div.tocify {
    width: 25%;
  }
}

@media (max-width: 767px) {
  div.tocify {
    width: 100%;
    max-width: none;
  }
}

.tocify ul, .tocify li {
  line-height: 20px;
}

.tocify-subheader .tocify-item {
  font-size: 0.90em;
}

.tocify .list-group-item {
  border-radius: 0px;
}


</style>


</head>

<body>


<div class="container-fluid main-container">


<!-- setup 3col/9col grid for toc_float and main content  -->
<div class="row">
<div class="col-sm-12 col-md-4 col-lg-3">
<div id="TOC" class="tocify">
</div>
</div>

<div class="toc-content col-sm-12 col-md-8 col-lg-9">


<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">GenoPred</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/opain/GenoPred">
    <span class="fas fa-github fa-lg"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->

<div id="header">


<h1 class="title toc-ignore">Estimating significance of prediction differences</h1>

</div>


<hr />
<p>(under development)</p>
<p>This analysis was to compare different methods for estimating the statistical significance of prediction differences across polygenic scoring methods, and models in general.</p>
<p>Here I simulate data with one continuous outcome, and two correlated continuous sets of predictions The correlation between the predictors and the outcome is varied, and the correlation between the predictors themselves is varied.</p>
<p>As I do in my other analyses, I estimate the correlation between the predictors and the outcome to determine their predictive utility. Then I compare different methods by comparing the observed-predicted correlations. Several method comparing correlation are considered:</p>
<ul>
<li>Two-sample Z-test
<ul>
<li>Compares estimates from two populations and does not account for the correlation between predictors</li>
</ul></li>
<li>Permutation based
<ul>
<li>Randomises the phenotype, retaining the correlation between predictors, and then estimates the number times the difference in correlation is larger than the observed difference.</li>
</ul></li>
<li>Cox test
<ul>
<li>Method for comparing non-nested models</li>
</ul></li>
<li>Pearson and Filon’s (1898):
<ul>
<li>Method for comparing correlations between variables that are correlated and measured in a single sample</li>
<li>Implemented using cocor.dep.groups.overlap function in the cocor package</li>
<li>Other methods implemented by this function are highly concordant</li>
</ul></li>
<li>Fisher r-to-z :
<ul>
<li>Method for comparing correlations between variables that are independent and measures in different samples.</li>
<li>Implemented using psych package</li>
<li>Accounts for non-normal error of correlations</li>
</ul></li>
<li>Williams test :
<ul>
<li>Method for comparing correlations between variables that are dependent and measured in the same sample.</li>
<li>Implemented using psych package</li>
<li>Accounts for non-normal error of correlations</li>
</ul></li>
</ul>
<p>We will simulate the following scenarios:</p>
<ul>
<li>Estimates from two independent samples</li>
<li>Estimates from one sample, but predictors are uncorrelated</li>
<li>Estimates from one sample, and predictors are highly correlated</li>
</ul>
<hr />
<div id="estimates-from-two-independent-samples" class="section level1">
<h1><span class="header-section-number">1</span> Estimates from two independent samples</h1>
<pre class="r"><code>library(data.table)

set.seed(1)
# Sample 1
N&lt;-200
y1&lt;-rnorm(N)
x1&lt;-scale(y1+rnorm(N,0,3))
dat1&lt;-data.table(y1,x1)
cor(dat1)</code></pre>
<pre><code>##           y1        V1
## y1 1.0000000 0.2743249
## V1 0.2743249 1.0000000</code></pre>
<pre class="r"><code># Sample 2
N&lt;-200
y2&lt;-rnorm(N)
x2&lt;-scale(y2+rnorm(N,0,10))
dat2&lt;-data.table(y2,x2)
cor(dat2)</code></pre>
<pre><code>##            y2         V1
## y2 1.00000000 0.06274804
## V1 0.06274804 1.00000000</code></pre>
<pre class="r"><code>### Derive models using each predictor
mod1&lt;-lm(y1 ~ x1, data=dat1)
mod2&lt;-lm(y2 ~ x2, data=dat2)

dat1$pred1&lt;-predict(mod1, dat1)
dat2$pred2&lt;-predict(mod2, dat2)

dat_cor&lt;-data.frame(model1=&#39;x1&#39;,
                    model2=&#39;x2&#39;,
                    cor_x1_x2=NA,
                    cor_y_x1=coef(summary(mod1))[2,1],
                    cor_y_x1_se=coef(summary(mod1))[2,2],
                    cor_y_x2=coef(summary(mod2))[2,1],
                    cor_y_x2_se=coef(summary(mod2))[2,2],
                    cor_diff=coef(summary(mod1))[2,1]-coef(summary(mod2))[2,1])

### Test difference between predictors using a Z-test
dat_cor$cor_diff_Ztest_P&lt;-pnorm(-(dat_cor$cor_diff/sqrt((dat_cor$cor_y_x1_se^2)+(dat_cor$cor_y_x2_se^2)))) 

### Test difference between predictors using a permutation test
n_perm&lt;-500
diff&lt;-NULL
for(i in 1:n_perm){
  y1_sample&lt;-sample(y1)
  y2_sample&lt;-sample(y2)
  mod1_i&lt;-lm(y1_sample ~ x1, data=dat1)
  mod2_i&lt;-lm(y2_sample ~ x2, data=dat2)
  
  diff_i&lt;-coef(summary(mod1_i))[2,1]-coef(summary(mod2_i))[2,1]
  diff&lt;-c(diff,diff_i)
}

dat_cor$cor_diff_perm_P&lt;-sum(diff &gt; dat_cor$cor_diff[1])/n_perm

### Test difference between predictors using coxtest 
# Not possible as different samples

### Test difference between predictors using Fisher&#39;s Z transformation 
library(cocor)
dat_cor$cocor_fisherZ_P&lt;-cocor.indep.groups(r1.jk=dat_cor$cor_y_x1, r2.hm=dat_cor$cor_y_x2, n1=N, n2=N, alternative=&#39;greater&#39;)@fisher1925$p.value

library(psych)
dat_cor$psych_fisherZ_P&lt;-paired.r(xy=dat_cor$cor_y_x1, xz=dat_cor$cor_y_x2, n=N, n2=N, twotailed=F)$p

dat_cor</code></pre>
<pre><code>##   model1 model2 cor_x1_x2  cor_y_x1 cor_y_x1_se   cor_y_x2 cor_y_x2_se
## 1     x1     x2        NA 0.2548745  0.06349504 0.06714255  0.07589417
##    cor_diff cor_diff_Ztest_P cor_diff_perm_P cocor_fisherZ_P psych_fisherZ_P
## 1 0.1877319       0.02890093           0.046      0.02747978      0.02747978</code></pre>
<p>The Fisher’s r-to-z transformation is a commonly used approach for comparing correlations from different samples. The two-sample z-test does not account for the non-normal error distribution of Pearson correlations. Results indicate the fisher r-to-z method is concordant with the two-sample z-test. The permutation-based approach is more conservative than other methods.</p>
<hr />
</div>
<div id="estimates-from-one-sample-but-predictors-are-uncorrelated" class="section level1">
<h1><span class="header-section-number">2</span> Estimates from one sample, but predictors are uncorrelated</h1>
<pre class="r"><code>library(data.table)
set.seed(1)
N&lt;-200
y&lt;-rnorm(N)
x1&lt;-as.numeric(scale(y+rnorm(N,0,3)))
x2&lt;-as.numeric(scale(y+rnorm(N,0,10)))
dat&lt;-data.table(y,x1,x2)
cor(dat)</code></pre>
<pre><code>##            y         x1         x2
## y  1.0000000 0.27432489 0.15351734
## x1 0.2743249 1.00000000 0.01848117
## x2 0.1535173 0.01848117 1.00000000</code></pre>
<pre class="r"><code>### Derive models using each predictor
mod1&lt;-lm(y ~ x1, data=dat)
mod2&lt;-lm(y ~ x2, data=dat)

dat$pred1&lt;-predict(mod1, dat)
dat$pred2&lt;-predict(mod2, dat)

dat_cor&lt;-data.frame(model1=&#39;x1&#39;,
                    model2=&#39;x2&#39;,
                    cor_x1_x2=cor(dat$x1,dat$x2),
                    cor_y_x1=coef(summary(mod1))[2,1],
                    cor_y_x1_se=coef(summary(mod1))[2,2],
                    cor_y_x2=coef(summary(mod2))[2,1],
                    cor_y_x2_se=coef(summary(mod2))[2,2],
                    cor_diff=coef(summary(mod1))[2,1]-coef(summary(mod2))[2,1])

### Test difference between predictors using a Z-test
dat_cor$cor_diff_Ztest_P&lt;-pnorm(-(dat_cor$cor_diff/sqrt((dat_cor$cor_y_x1_se^2)+(dat_cor$cor_y_x2_se^2)))) 

### Test difference between predictors using a permutation test
n_perm&lt;-500
diff&lt;-NULL
for(i in 1:n_perm){
  y_sample&lt;-sample(y)
  mod1_i&lt;-lm(y_sample ~ x1, data=dat)
  mod2_i&lt;-lm(y_sample ~ x2, data=dat)
  
  diff_i&lt;-coef(summary(mod1_i))[2,1]-coef(summary(mod2_i))[2,1]
  diff&lt;-c(diff,diff_i)
}

dat_cor$cor_diff_perm_P&lt;-sum(diff &gt; dat_cor$cor_diff[1])/n_perm

### Test difference between predictors using coxtest 
library(lmtest)
dat_cor$cox_diff_P&lt;-coxtest(mod1, mod2)$P[2]

### Test difference between predictors using Pearson 
library(cocor)
dat_cor$pearson_diff_P&lt;-cocor.dep.groups.overlap(dat_cor$cor_y_x1, dat_cor$cor_y_x2, dat_cor$cor_x1_x2, N,alternative=&#39;greater&#39;)@pearson1898$p.value

### Test difference between predictors using Williams&#39;s Test
library(psych)
dat_cor$williams_diff_P&lt;-paired.r(xy=dat_cor$cor_y_x1, xz=dat_cor$cor_y_x2, yz=dat_cor$cor_x1_x2, n=N, twotailed=F)$p

dat_cor</code></pre>
<pre><code>##   model1 model2  cor_x1_x2  cor_y_x1 cor_y_x1_se  cor_y_x2 cor_y_x2_se cor_diff
## 1     x1     x2 0.01848117 0.2548745  0.06349504 0.1426325  0.06524537 0.112242
##   cor_diff_Ztest_P cor_diff_perm_P cox_diff_P pearson_diff_P williams_diff_P
## 1        0.1088132           0.112          0      0.1205402       0.1230892</code></pre>
<p>Apart fromt the coxtest method, results are similar across methods, with pearson and williams methods being highly concordant.</p>
<hr />
</div>
<div id="estimates-from-one-sample-and-predictors-are-highly-correlated" class="section level1">
<h1><span class="header-section-number">3</span> Estimates from one sample, and predictors are highly correlated</h1>
<pre class="r"><code>library(data.table)
set.seed(1)
N&lt;-200
y&lt;-rnorm(N)
set.seed(2)
x1&lt;-as.numeric(scale(y+rnorm(N,0,3)))
set.seed(3)
x2&lt;-as.numeric(scale(x1+rnorm(N,0,1)))
dat&lt;-data.table(y,x1,x2)
cor(dat)</code></pre>
<pre><code>##            y        x1        x2
## y  1.0000000 0.2813946 0.1928833
## x1 0.2813946 1.0000000 0.6928299
## x2 0.1928833 0.6928299 1.0000000</code></pre>
<pre class="r"><code>### Derive models using each predictor
mod1&lt;-lm(y ~ x1, data=dat)
mod2&lt;-lm(y ~ x2, data=dat)

dat$pred1&lt;-predict(mod1, dat)
dat$pred2&lt;-predict(mod2, dat)

dat_cor&lt;-data.frame(model1=&#39;x1&#39;,
                    model2=&#39;x2&#39;,
                    cor_x1_x2=cor(dat$x1,dat$x2),
                    cor_y_x1=coef(summary(mod1))[2,1],
                    cor_y_x1_se=coef(summary(mod1))[2,2],
                    cor_y_x2=coef(summary(mod2))[2,1],
                    cor_y_x2_se=coef(summary(mod2))[2,2],
                    cor_diff=coef(summary(mod1))[2,1]-coef(summary(mod2))[2,1])

### Test difference between predictors using a Z-test
dat_cor$cor_diff_Ztest_P&lt;-pnorm(-(dat_cor$cor_diff/sqrt((dat_cor$cor_y_x1_se^2)+(dat_cor$cor_y_x2_se^2)))) 

### Test difference between predictors using a permutation test
n_perm&lt;-500
diff&lt;-NULL
for(i in 1:n_perm){
  y_sample&lt;-sample(y)
  mod1_i&lt;-lm(y_sample ~ x1, data=dat)
  mod2_i&lt;-lm(y_sample ~ x2, data=dat)
  
  diff_i&lt;-coef(summary(mod1_i))[2,1]-coef(summary(mod2_i))[2,1]
  diff&lt;-c(diff,diff_i)
}

dat_cor$cor_diff_perm_P&lt;-sum(diff &gt; dat_cor$cor_diff[1])/n_perm

### Test difference between predictors using coxtest 
library(lmtest)
dat_cor$cox_diff_P&lt;-coxtest(mod1, mod2)$P[2]

### Test difference between predictors using Pearson 
library(cocor)
dat_cor$pearson_diff_P&lt;-cocor.dep.groups.overlap(dat_cor$cor_y_x1, dat_cor$cor_y_x2, dat_cor$cor_x1_x2, N,alternative=&#39;greater&#39;)@pearson1898$p.value

### Test difference between predictors using Williams&#39;s Test
library(psych)
dat_cor$williams_diff_P&lt;-paired.r(xy=dat_cor$cor_y_x1, xz=dat_cor$cor_y_x2, yz=dat_cor$cor_x1_x2, n=N, twotailed=F)$p

### Test difference between predictors using ROC test
library(pROC)</code></pre>
<pre><code>## Type &#39;citation(&quot;pROC&quot;)&#39; for a citation.</code></pre>
<pre><code>## 
## Attaching package: &#39;pROC&#39;</code></pre>
<pre><code>## The following objects are masked from &#39;package:stats&#39;:
## 
##     cov, smooth, var</code></pre>
<pre class="r"><code>mod1_roc&lt;-roc(y ~ x1)</code></pre>
<pre><code>## Warning in roc.default(response, predictors[, 1], ...): &#39;response&#39; has more
## than two levels. Consider setting &#39;levels&#39; explicitly or using &#39;multiclass.roc&#39;
## instead</code></pre>
<pre><code>## Setting levels: control = -2.2146998871775, case = -1.98935169586337</code></pre>
<pre><code>## Setting direction: controls &lt; cases</code></pre>
<pre class="r"><code>mod2_roc&lt;-roc(y ~ x2)</code></pre>
<pre><code>## Warning in roc.default(response, predictors[, 1], ...): &#39;response&#39; has more
## than two levels. Consider setting &#39;levels&#39; explicitly or using &#39;multiclass.roc&#39;
## instead</code></pre>
<pre><code>## Setting levels: control = -2.2146998871775, case = -1.98935169586337
## Setting direction: controls &lt; cases</code></pre>
<pre class="r"><code>dat_cor$mod1_auc&lt;-mod1_roc$auc
dat_cor$mod2_auc&lt;-mod2_roc$auc

dat_cor$roc_diff_P&lt;-roc.test(mod1_roc,mod2_roc, paired=T, alternative=&#39;greater&#39;, method=&#39;bootstrap&#39;)$p.value</code></pre>
<pre><code>## Warning in roc.test.roc(mod1_roc, mod2_roc, paired = T, alternative =
## &quot;greater&quot;, : roc.test() of two ROC curves with AUC == 1 has always p.value = 1
## and can be misleading.</code></pre>
<pre class="r"><code>dat_cor</code></pre>
<pre><code>##   model1 model2 cor_x1_x2  cor_y_x1 cor_y_x1_se  cor_y_x2 cor_y_x2_se
## 1     x1     x2 0.6928299 0.2614429  0.06336002 0.1792073  0.06478817
##     cor_diff cor_diff_Ztest_P cor_diff_perm_P   cox_diff_P pearson_diff_P
## 1 0.08223559        0.1820774           0.068 2.185878e-06     0.06303985
##   williams_diff_P mod1_auc mod2_auc roc_diff_P
## 1      0.06448287        1        1        0.5</code></pre>
<p>Again the coxtest method is very different from other methods. The two-sample Z test is now deviating from the other methods because it doesn’t account for the correlation between predictors. The results for the permutation, pearson and williams method are highly concordant.</p>
<p>The psych package is a solid package and the Williams method is recommended by Steiger who worked alot in this area. It is also faster than the permutation based approach. From here on use the Williams test to test for significant differences between correlations between outcomes and correlated predictors.</p>
<hr />
</div>
<div id="under-development" class="section level1">
<h1><span class="header-section-number">4</span> Under development…</h1>
<p>Another method that is used to compare models is to compare AUC curves. Ths can be implemented using the pROC package in R. This test is only suitable for binary outcomes. For comparison, compare the AUC method to the permutation and wiliams methods.</p>
<hr />
<div id="estimates-from-one-sample-and-predictors-are-highly-correlated-1" class="section level2">
<h2><span class="header-section-number">4.1</span> Estimates from one sample, and predictors are highly correlated</h2>
<pre class="r"><code>library(data.table)
set.seed(1)
N&lt;-200
y&lt;-rbinom(n=N, size=1, prob=0.5)
set.seed(2)
x1&lt;-as.numeric(scale(y+rnorm(N,0,2)))
set.seed(3)
x2&lt;-as.numeric(scale(x1+rnorm(N,0,1)))
dat&lt;-data.table(y,x1,x2)
cor(dat)</code></pre>
<pre><code>##            y        x1        x2
## y  1.0000000 0.1747443 0.1361085
## x1 0.1747443 1.0000000 0.6951187
## x2 0.1361085 0.6951187 1.0000000</code></pre>
<pre class="r"><code>### Derive models using each predictor
mod1&lt;-glm(y ~ x1, data=dat, family=&#39;binomial&#39;)
mod2&lt;-glm(y ~ x2, data=dat, family=&#39;binomial&#39;)

dat$pred1&lt;-predict(mod1, dat)
dat$pred2&lt;-predict(mod2, dat)

dat_cor&lt;-data.frame(model1=&#39;x1&#39;,
                    model2=&#39;x2&#39;,
                    cor_x1_x2=cor(dat$x1,dat$x2),
                    cor_y_x1=coef(summary(mod1))[2,1],
                    cor_y_x1_se=coef(summary(mod1))[2,2],
                    cor_y_x2=coef(summary(mod2))[2,1],
                    cor_y_x2_se=coef(summary(mod2))[2,2],
                    cor_diff=coef(summary(mod1))[2,1]-coef(summary(mod2))[2,1])

### Test difference between predictors using a permutation test
n_perm&lt;-500
diff&lt;-NULL
for(i in 1:n_perm){
  y_sample&lt;-sample(y)
  mod1_i&lt;-glm(y_sample ~ x1, data=dat, family=&#39;binomial&#39;)
  mod2_i&lt;-glm(y_sample ~ x2, data=dat, family=&#39;binomial&#39;)
  
  diff_i&lt;-coef(summary(mod1_i))[2,1]-coef(summary(mod2_i))[2,1]
  diff&lt;-c(diff,diff_i)
}

dat_cor$cor_diff_perm_P&lt;-sum(diff &gt; dat_cor$cor_diff[1])/n_perm

### Test difference between predictors using Williams&#39;s Test
library(psych)
dat_cor$williams_diff_P&lt;-paired.r(xy=dat_cor$cor_y_x1, xz=dat_cor$cor_y_x2, yz=dat_cor$cor_x1_x2, n=N, twotailed=F)$p

### Test difference between predictors using ROC test
library(pROC)
mod1_roc&lt;-roc(y ~ x1)</code></pre>
<pre><code>## Setting levels: control = 0, case = 1</code></pre>
<pre><code>## Setting direction: controls &lt; cases</code></pre>
<pre class="r"><code>mod2_roc&lt;-roc(y ~ x2)</code></pre>
<pre><code>## Setting levels: control = 0, case = 1
## Setting direction: controls &lt; cases</code></pre>
<pre class="r"><code>dat_cor$mod1_auc&lt;-mod1_roc$auc
dat_cor$mod2_auc&lt;-mod2_roc$auc

dat_cor$roc_diff_P&lt;-roc.test(mod1_roc,mod2_roc, paired=T, alternative=&#39;greater&#39;)$p.value

dat_cor</code></pre>
<pre><code>##   model1 model2 cor_x1_x2 cor_y_x1 cor_y_x1_se  cor_y_x2 cor_y_x2_se  cor_diff
## 1     x1     x2 0.6951187 0.359497   0.1473043 0.2777717   0.1455746 0.0817253
##   cor_diff_perm_P williams_diff_P  mod1_auc  mod2_auc roc_diff_P
## 1           0.216       0.0586538 0.5869348 0.5847339  0.4737637</code></pre>
<p>This analysis raises some concerns about the validity of the William’s test when the outcome is binary. We should check the false positive rate of the Williams test by testing how many &lt;0.05 tests there are under the null.</p>
<hr />
</div>
</div>


</div>
</div>

</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.odd').parent('tbody').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- tabsets -->

<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});

$(document).ready(function () {
  $('.tabset-dropdown > .nav-tabs > li').click(function () {
    $(this).parent().toggleClass('nav-tabs-open');
  });
});
</script>

<!-- code folding -->

<script>
$(document).ready(function ()  {

    // move toc-ignore selectors from section div to header
    $('div.section.toc-ignore')
        .removeClass('toc-ignore')
        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');

    // establish options
    var options = {
      selectors: "h1,h2,h3",
      theme: "bootstrap3",
      context: '.toc-content',
      hashGenerator: function (text) {
        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_');
      },
      ignoreSelector: ".toc-ignore",
      scrollTo: 0
    };
    options.showAndHide = true;
    options.smoothScroll = true;

    // tocify
    var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>