-
Notifications
You must be signed in to change notification settings - Fork 5
/
PRScoms.txt
91 lines (59 loc) · 2.85 KB
/
PRScoms.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
################## POLYGENIC RISK SCORE ###################################
#use metal inverse variance weighted meta-analysis to generate beta and SE to test prediction in Gothenburg H70 Birth Cohort Studies and Clinical AD from Sweden (Gothenburg) cohort
#example metalscript
#metalscript included all datasets except Gothenburg and UKB biobank
#metalscript.txt
SCHEME STDERR
MARKERLABEL SNP
ALLELELABELS A1 A2
PVALUELABEL P
EFFECTLABEL BETA
STDERR SE
PROCESS dataset1.txt
MARKERLABEL SNP
ALLELELABELS A1 A2
PVALUELABEL P
EFFECTLABEL BETA
STDERR SE
PROCESS dataset2.txt
ANALYZE
QUIT
##
#analyse dataset. All datasets had variants with MAF <1/sqrt(2*N) removed prior to meta-analysis
./metal metalscript.txt
#run PRS on Gothenburg dataset
#Gothenburg dataset is postimputation and has any overlapping individuals removed (code in QCimpRegressioncoms.txt)
#covariates are PCs1-4,sig PCs, and sex
#Mresults.txt is the results from the metal meta-analysis which includes SNP CHR BP A1 A2 BETA SE P
Rscript PRSice.R --dir . --prsice ./PRSice_linux --base Mresults.txt --target GothenburgHRCXno --thread max --stat BETA --beta --binary-target T --cov covars.cov --out metalprs
##without APOE
#19:40000000-50000000 has been removed from Mresults.txt
Rscript PRSice.R --dir . --prsice ./PRSice_linux --base Mresults.txt --target GothenburgHRCXno --thread max --stat BETA --beta --binary-target T --cov covars.cov --out metalprsNoAPOE
#run prs using the UKB sumstats as the base model
Rscript PRSice.R --dir . --prsice ./PRSice_linux --base UKBresults.txt --target GothenburgHRCXno --thread max --stat BETA --beta --binary-target T --cov covars.cov --out UKBprs
#without APOE
#19:40000000-50000000 has been removed from UKBresults.txt
Rscript PRSice.R --dir . --prsice ./PRSice_linux --base UKBresults.txt --target GothenburgHRCXno --thread max --stat BETA --beta --binary-target T --cov covars.cov --out UKBprsnoUKB
#compare the two predition is R
#generate baseline predictions without covariates
#these do include APOE
Rscript PRSice.R --dir . --prsice ./PRSice_linux --base UKBresults.txt --target GothenburgHRCXno --thread max --stat BETA --beta --binary-target T --out UKBprsBASE
Rscript PRSice.R --dir . --prsice ./PRSice_linux --base Mresults.txt --target GothenburgHRCXno --thread max --stat BETA --beta --binary-target T --out metalprsBASE
#combined PRS
library(data.table)
y<-fread("GothenburgHRCXno.fam", header=F)
#remove people with missing phenotype
y<-y[y$V6>0,]
uk<-fread("UKBprsBASE.best",header=T)
me<-fread("metalprsBASE.best",header=T)
c<-fread("covars.cov", header=T)
c$ukb<-uk$PRS
c$metal<-me$PRS
#overlap individuals
c<-c[c$FID %in% y$V1,]
all(c$FID==y$V1)
#make phenotype binary (1 and 0 rather than 1 and 2)
c$pheno<-y$V6-1
#fit model of the base predictions
fit1<-glm(pheno ~ukb+metal, data = c, family = binomial)
with(summary(fit1), 1 - deviance/null.deviance)