-
Notifications
You must be signed in to change notification settings - Fork 1
/
script SCOAR META.R
832 lines (697 loc) · 28.2 KB
/
script SCOAR META.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
#By Keith Lohse, Rehabilitation Informatics Lab, 2016-10-07
# For this analysis, you will need to install and then open the following packages:
# install.packages("metafor"); install.packages("dplyr"); install.packages("ggplot2")
library("metafor"); library("dplyr"); library("ggplot2")
## Setting the Directory -------------------------------------------------------
getwd()
setwd("C:/Users/krl0022/Documents/GitHub/dose_meta/")
list.files()
# make sure "data SCOAR TEXT OUTLIERS REMOVED.csv" is in the working directory.
# Read in the full data set
DATA<-read.csv("data SCOAR TEXT OUTLIERS REMOVED.csv", header = TRUE)
head(DATA)
# Alternatively, you can read the text file into R directly from my GitHub repo:
# DATA<-read.table("https://raw.github.com/keithlohse/dose_meta/master/data%20SCOAR%20TEXT%20OUTLIERS%20REMOVED.txt",
# header = TRUE, sep="\t")
# head(DATA)
# Note this means that all effect sizes greater than d = 3.0 have been removed.
##########################################
### Analysis of the primary extraction ###
##########################################
# In this first analysis, we took only the primary outcome or
# the first usable secondary outcome if no primary was listed.
# Below, we will conduct separate analyses for the FMA and gait
# speed (regardless of if those measures were primary outcomes).
# Create different sets of data
# 1. DATA <- contains all data for experimental and control groups
# 2. LOHSE <- contains only primary outcomes and excludes missing cases
# 3. CTRLS <- contains only the control group data from LOHSE
# 4. EXPS <- contains on the experimental group data from LOHSE
# 5. BIGN <- contains only primary outcomes for groups with base n > 30
# 6. FMA <- contains data for Fugl-Meyer Assessment outcomes excludes missing cases
# 7. SPEED <- contains data for gait speed outcomes excludes missing cases
# Creating the "LOHSE" subset
LOHSE<-subset(DATA, SCOAR_outcome == "lohse")
# Length should be 489 (total number of independent groups)
length(LOHSE$SCOAR_outcome)
# Sum should be 12,847 (total number of participants)
sum(LOHSE$base_n)
# Remove studies missing time for therapy
LOHSE<-subset(LOHSE,!time_50 == "na")
# Remove studies missing duration of therapy
LOHSE<-subset(LOHSE,!exp_dur == "na")
# Remove studies missing age at baseline
LOHSE<-subset(LOHSE,!age_base == "na")
# Remove studies missing days post-stroke
LOHSE<-subset(LOHSE,!days_ps == "na")
# Remove studies missing terminal effect-sizes
LOHSE<-subset(LOHSE,!term_d == "na")
# New length should be 303
length(LOHSE$SCOAR_outcome)
# New sum should be 6767
sum(LOHSE$base_n)
# Creating a subset of only those studies with large sample sizes
BIGN<-subset(LOHSE, base_n >= 20)
length(BIGN$SCOAR_outcome)
sum(BIGN$base_n)
# Creating a subset of control groups only
CTRLS<-subset(LOHSE, group == "ctrl")
summary(CTRLS$group)
# Creating a subset of experimental groups only
EXPS<-subset(LOHSE, group == "exp")
summary(EXPS$group)
# FMA and SPEED datasets are defined below
#################################################
# Descriptive Statistics of the included Groups #
#################################################
## Table 1 ---------------------------------------------------------------------
# These descriptive statistics appear in Table 1 of the manuscript.
# average age of patients at baseline
summary(LOHSE$age_base)
sd(LOHSE$age_base)
# average days from stroke to the beginning of therapy
summary(LOHSE$days_ps)
sd(LOHSE$days_ps)
# duration of the intervention
summary(LOHSE$exp_dur)
sd(LOHSE$exp_dur)
# time scheduled for therapy based on the Max Time calculation
summary(LOHSE$time_MAX)
sd(LOHSE$time_MAX)
# time scheduled for therapy based on the 50% time calculation
summary(LOHSE$time_50)
sd(LOHSE$time_50)
# time scheduled for therapy based on the Min time calculation
summary(LOHSE$time_MIN)
sd(LOHSE$time_MIN)
# How was time in therapy quantified? 1 = time scheduled; 4 = repetitions
summary(as.factor(LOHSE$detailed_time))
# Did the authors specify an intention to treat analysis
summary(as.factor(LOHSE$itt_analysis))
# Number of subjects contributing to baseline means
summary(LOHSE$base_n)
sd(LOHSE$base_n)
# Number of subjects contributing to the terminal means
summary(LOHSE$term_n)
sd(LOHSE$term_n)
# Number of subjects contributing for follow up means
summary(LOHSE$fu_n)
sd(LOHSE$fu_n, na.rm=TRUE)
# Note that we did not filter the data by follow up outcomes,
# so there are missing cases that we need to exclude in this calculation
# Time from the baseline assessment to the follow-up assessment
summary(LOHSE$fu_dur)
sd(LOHSE$fu_dur, na.rm=TRUE)
# Number of experimental versus control groups
summary(LOHSE$group)
# Number of subjects contributing to the terminal means
summary(LOHSE$term_g)
sd(LOHSE$term_g)
## -----------------------------------------------------------------------------
##############################
# Analysis of All Group Data #
##############################
# Random Effect model of all groups, using maximum likelihood estimation
m0<-rma(term_g, term_Vg, data=LOHSE, method="ML")
m0
confint(m0)
# Random Effect model of CONTROL groups, using maximum likelihood estimation
m0A<-rma(term_g, term_Vg, data=CTRLS, method="ML")
m0A
# Random Effect model of EXPERIMENTAL groups, using maximum likelihood estimation
m0B<-rma(term_g, term_Vg, data=EXPS, method="ML")
m0B
## Figure 1 --------------------------------------------------------------------
# Publication Bias
# Creating a forest plot to show the RE model of all of the data
forest(m0, slab=paste(LOHSE$author, LOHSE$year, sep=", "), cex=1.5)
# Creating a funnel plot to show potential bias in the full dataset
palette(c("#fee8c8","#e34a33"))
funnel(m0, pch=21, bg=LOHSE$group, cex = 1.5, cex.axis=1.25, cex.lab=1.25,
xlab="Terminal Outcome (g)")
# Statistical test of symmetry (see Eggers, 1997)
regtest(m0, model = "lm")
# Significant result indicates bias in the distribution of results
## -----------------------------------------------------------------------------
# Moderator Analysis of Treatment Versus Control Groups
## First we need to center the "group" variable
summary(as.numeric(LOHSE$group))
LOHSE$exp.c<-as.numeric(LOHSE$group)-1.5
head(LOHSE)
## Then we can included this centered group variable in our model
m1<-rma(term_g~exp.c, term_Vg, data=LOHSE, method="ML")
m1
## We can also get the 95% confidence estimates for the model
confint(m1)
#Moderator Analysis of Groups and DPS
## First we center the DPS variable
summary(LOHSE$days_ps)
LOHSE$DPS.c<-LOHSE$days_ps-mean(LOHSE$days_ps)
hist(LOHSE$days_ps)
## Following our transformation, we now center the rtDPS variable
LOHSE$rtDPS<-sqrt(LOHSE$days_ps)
summary(LOHSE$rtDPS)
LOHSE$rtDPS.c<-LOHSE$rtDPS-mean(LOHSE$rtDPS)
hist(LOHSE$rtDPS.c)
## We then include the centered rtDPS variable in our model
m2<-rma(term_g~exp.c+rtDPS.c, term_Vg, data=LOHSE, method="ML")
m2
confint(m2)
#Moderator Analysis of Group and Estimated Time Scheduled
## First we mean center the TIME variable
summary(LOHSE$time_50)
hist(LOHSE$time_50)
LOHSE$time50.c<-LOHSE$time_50-mean(LOHSE$time_50)
## Following our transformation, we now mean center the rtTIME variable
LOHSE$rtTIME<-sqrt(LOHSE$time_50)
summary(LOHSE$rtTIME)
LOHSE$rtTIME.c<-LOHSE$rtTIME-mean(LOHSE$rtTIME)
hist(LOHSE$rtTIME)
## We can then add the mean centered rtTIME to your model
m3<-rma(term_g~exp.c+rtDPS.c+rtTIME.c, term_Vg, data=LOHSE, method="ML")
m3
confint(m3)
plot(m3)
# Interaction of Dose with Time
## No new variables need to be created to test this interaction
m4<-rma(term_g~exp.c+rtDPS.c*rtTIME.c, term_Vg, data=LOHSE, method="ML")
m4
confint(m4)
plot(m4)
#Moderator Analysis of Group, DPS, TIME and Age
summary(LOHSE$age_base)
hist(LOHSE$age_base)
## First we mean center the AGE variable
LOHSE$age.c<-LOHSE$age_base-mean(LOHSE$age_base)
## Then we can include mean centered age in our model
m5<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c, term_Vg, data=LOHSE, method="ML")
m5
confint(m5)
#Controlling for the effect of therapy duration
summary(LOHSE$exp_dur)
hist(LOHSE$exp_dur)
## First we mean center the variable of duration
LOHSE$DUR.c<-LOHSE$exp_dur-mean(LOHSE$exp_dur)
## Following our transformation, we now mean center the rtDUR variable
LOHSE$rtDUR<-sqrt(LOHSE$exp_dur)
summary(LOHSE$rtDUR)
hist(LOHSE$rtDUR)
LOHSE$rtDUR.c<-LOHSE$rtDUR-mean(LOHSE$rtDUR)
## We also want to check for potential colinearity between duration and time
plot(LOHSE$rtDUR.c,LOHSE$rtTIME.c)
cor.test(LOHSE$rtDUR.c,LOHSE$rtTIME.c)
## Finally we can add the mean centered rtDUR variable to the model
m6<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c+rtDUR.c, term_Vg, data=LOHSE, method="ML")
m6
confint(m6)
# Next, we export the data (including the variables we created) to a csv file
write.csv(LOHSE, file="data SCOAR R MAIN ANALYSIS.csv")
#Comparison between the different models
summary(m0)
# logLik deviance AIC BIC AICc
# -196.4562 666.8507 396.9124 404.3398 396.9524
summary(m1)
# logLik deviance AIC BIC AICc
# -179.4614 632.8610 364.9228 376.0640 365.0030
summary(m2)
# logLik deviance AIC BIC AICc
# -133.6918 541.3218 275.3836 290.2385 275.5178
summary(m3)
# logLik deviance AIC BIC AICc
# -127.3528 528.6440 264.7057 283.2744 264.9077
summary(m4)
# logLik deviance AIC BIC AICc
# -125.4686 524.8755 262.9372 285.2196 263.2210
summary(m5)
# logLik deviance AIC BIC AICc
# -121.7344 517.4070 257.4687 283.4649 257.8484
summary(m6)
# logLik deviance AIC BIC AICc
# -120.4646 514.8674 256.9291 286.6390 257.4189
# So m5 is our best fitting model...
# but lets double-check our statistical assumptions/diagnostics
# The distribution of residuals for this model is approximately normal
plot(density(resid(m5)))
# The distribution of residuals for this model show some heteroscedasticity
plot(resid(m5)~fitted(m5))
# but not a significant relationship between fitted values and residuals.
cor.test(resid(m5),fitted(m5))
#########################################
# Plots for Multivariable Relationships #
#########################################
## Figure 2 --------------------------------------------------------------------
# Plot of outcomes by Days Post Stroke (DPS)
palette(c("#ffffcc", "#a1dab4", "#41b6c4", "#225ea8"))
plot(LOHSE$term_g~LOHSE$yrs_ps, bty='n', type='p', pch=21, bg=LOHSE$days_cat,
col="black", lwd=1.0, xlim=c(0,10), ylim=c(-0.5,3.5),
cex=sqrt(LOHSE$base_n)/2, cex.axis=1.25, cex.lab=1.25,
ylab="Terminal Outcome (g)", xlab="Time Post Stroke (yrs)")
plot(LOHSE$term_g~LOHSE$rtDPS, bty='n', type='p', pch=21, bg=LOHSE$days_cat,
col="black", lwd=1.0, xlim=c(0,60), ylim=c(-0.5,3.5),
cex=sqrt(LOHSE$base_n)/2, cex.axis=1.25, cex.lab=1.25,
ylab="Terminal Outcome (g)", xlab="Time Post Stroke (sqrt(days))")
#The square root of DPS is the actual variable in our models, so I think we
# should use it in the figures, however, below is the coding for putting
# linear time (e.g. untransformed) on the x-axis)
#Plot of outcomes by Time Scheduled for therapy
LOHSE$TIMEq <- ntile(LOHSE$rtTIME, 4)
palette(c("#ffffcc", "#a1dab4", "#41b6c4", "#225ea8"))
plot(LOHSE$term_g~LOHSE$rtTIME, bty='n', xlim=c(0,15), ylim=c(-0.5,3.5),
bg=LOHSE$TIMEq, col = "black", pch=21, cex=sqrt(LOHSE$base_n)/2,
cex.axis=1.25, cex.lab=1.25, ylab="Terminal Outcome (g)",
xlab="Estimated Time Scheduled for Therapy (sqrt(hrs))")
plot(LOHSE$term_g~LOHSE$time_50, bty='n', xlim=c(0,250), ylim=c(-0.5,3.5),
bg=LOHSE$TIMEq, col = "black", pch=21, cex=sqrt(LOHSE$base_n)/2,
cex.axis=1.25, cex.lab=1.25, ylab="Terminal Outcome (g)",
xlab="Estimated Time Scheduled for Therapy (hrs))")
## -----------------------------------------------------------------------------
#################################################
### Analysis of the "big" groups only, n > 20 ###
#################################################
# Descriptive Statistics for the BIGN data
summary(BIGN$age_base)
sd(BIGN$age_base, na.rm=TRUE)
summary(BIGN$days_ps)
sd(BIGN$days_ps, na.rm=TRUE)
summary(BIGN$exp_dur)
sd(BIGN$exp_dur, na.rm=TRUE)
summary(BIGN$time_50)
sd(BIGN$time_50, na.rm=TRUE)
summary(BIGN$group)
summary(BIGN$base_n)
sd(BIGN$base_n, na.rm=TRUE)
summary(BIGN$term_g)
sd(BIGN$term_g, na.rm=TRUE)
# Overall analysis of all trials
b0<-rma(term_g, term_Vg, data=BIGN, method="ML")
b0
confint(b0)
# Creating a forest plot to show the RE model of all of the data
forest(b0, slab=paste(BIGN$author, BIGN$year, sep=", "), cex=1.5)
# Creating a funnel plot to show potential bias in the full dataset
palette(c("orange","dodgerblue"))
funnel(b0, pch=21, bg=BIGN$group, cex = 1.5, cex.axis=1.25, cex.lab=1.25, xlab="Terminal Outcome (g)")
# Statistical test of symmetry (see Eggers, 1997)
regtest(b0, model = "lm")
# Significant result indicates bias in the distribution of results
# Moderator Analysis of Treatment Versus Control Groups
summary(as.numeric(BIGN$group))
BIGN$exp.c<-as.numeric(BIGN$group)-1.5
head(BIGN)
b1<-rma(term_g~exp.c, term_Vg, data=BIGN, method="ML")
b1
confint(b1)
# Moderator Analysis of Groups and DPS
summary(BIGN$days_ps)
BIGN$DPS.c<-BIGN$days_ps-mean(BIGN$days_ps)
hist(BIGN$days_ps)
BIGN$rtDPS<-sqrt(BIGN$days_ps)
summary(BIGN$rtDPS)
BIGN$rtDPS.c<-BIGN$rtDPS-mean(BIGN$rtDPS)
hist(BIGN$rtDPS.c)
b2<-rma(term_g~exp.c+rtDPS.c, term_Vg, data=BIGN, method="ML")
b2
confint(b2)
# Moderator Analysis of Group and Estimated Time Scheduled
summary(BIGN$time_50)
hist(BIGN$time_50)
BIGN$time50.c<-BIGN$time_50-mean(BIGN$time_50)
BIGN$rtTIME<-sqrt(BIGN$time_50)
summary(BIGN$rtTIME)
BIGN$rtTIME.c<-BIGN$rtTIME-mean(BIGN$rtTIME)
hist(BIGN$rtTIME)
b3<-rma(term_g~exp.c+rtDPS.c+rtTIME.c, term_Vg, data=BIGN, method="ML")
b3
confint(b3)
plot(b3)
# Adding the interaction of DPS and TIME
b4<-rma(term_g~exp.c+rtDPS.c*rtTIME.c, term_Vg, data=BIGN, method="ML")
b4
confint(b4)
plot(fitted(b4),resid(b4))
plot(density(resid(b4)))
# Moderator Analysis of Group, DPS, TIME and Age
summary(BIGN$age_base)
hist(BIGN$age_base)
BIGN$age.c<-BIGN$age_base-mean(BIGN$age_base)
b5<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c, term_Vg, data=BIGN, method="ML")
b5
confint(b5)
# Controlling for the effect of therapy duration
summary(BIGN$exp_dur)
hist(BIGN$exp_dur)
BIGN$DUR.c<-BIGN$exp_dur-mean(BIGN$exp_dur)
BIGN$rtDUR<-sqrt(BIGN$exp_dur)
summary(BIGN$rtDUR)
BIGN$rtDUR.c<-BIGN$rtDUR-mean(BIGN$rtDUR)
hist(BIGN$rtDUR.c)
## Testing for colinearity between duration and time
plot(BIGN$rtDUR.c,BIGN$rtTIME.c)
cor.test(BIGN$rtDUR.c,BIGN$rtTIME.c)
# Controlling for the duration of therapy
b6<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c+rtDUR.c, term_Vg, data=BIGN, method="ML")
b6
confint(b6)
# Comparing the fit of the different models
summary(b0)
# logLik deviance AIC BIC AICc
# -70.2411 325.1712 144.4821 150.0902 144.5830
summary(b1)
# logLik deviance AIC BIC AICc
# -65.2092 315.1075 136.4185 144.8306 136.6219
summary(b2)
# logLik deviance AIC BIC AICc
# -39.8544 264.3979 87.7088 98.9249 88.0507
summary(b3)
# logLik deviance AIC BIC AICc
# -36.3532 257.3954 82.7064 96.7265 83.2236
summary(b4)
# logLik deviance AIC BIC AICc
# -34.4962 253.6814 80.9924 97.8165 81.7228
summary(b5)
# logLik deviance AIC BIC AICc
# -33.5010 251.6911 81.0020 100.6302 81.9845
summary(b6)
# logLik deviance AIC BIC AICc
# -32.1270 248.9430 80.2540 102.6861 81.5283
# So b4 is our best fitting model...
# but lets double-check our statistical assumptions/diagnostics
# The distribution of residuals for this model is approximately normal
plot(density(resid(b4)))
# The distribution of residuals for this model show some heteroscedasticity
plot(resid(b4)~fitted(b4))
# but not a significant relationship between fitted values and residuals.
cor.test(resid(b4),fitted(b4))
# We can also export the BIGN data (and the variables we created) to a csv
write.csv(BIGN, file="data SCOAR BIG N ANALYSIS.csv")
## Figure 2 with only studies N >20 --------------------------------------------
# Plot of outcomes by AGE
# the ntile function chops the variable into X equally sized groups.
BIGN$AGEq <- ntile(BIGN$age_base, 4)
# Topocolors is just a color pallette that I like
palette(c("#ffffcc", "#a1dab4", "#41b6c4", "#225ea8"))
plot(BIGN$term_g~BIGN$age_base, bty='n', xlim=c(40,90), ylim=c(-0.5,3.5),
bg=BIGN$AGEq, col = "black", pch=21, cex=sqrt(BIGN$base_n)/2,
cex.axis=1.25, cex.lab=1.25, ylab="Terminal Outcome (g)",
xlab="Average Age (years)")
#Plot of outcomes by DPS
plot(BIGN$term_g~BIGN$rtDPS, bty='n', type='p', pch=21, bg=BIGN$days_cat,
col="black", lwd=1.0, xlim=c(0,60), ylim=c(-0.5,3.5),
cex=sqrt(BIGN$base_n)/2, cex.axis=1.25, cex.lab=1.25,
ylab="Terminal Outcome (g)", xlab="Time Post Stroke (sqrt(days)")
## -----------------------------------------------------------------------------
###########################################
### Analysis of the FMA for the UE only ###
###########################################
# All outcomes are fma, fma-ue, fma-se, or fma-wh
# fma-le had been excluded
# only one outcome per study
FMA<-subset(DATA, fma_outcome == "fma")
# Remove studies missing time for therapy
FMA<-subset(FMA,!time_50 == "na")
# Remove studies missing duration of therapy
FMA<-subset(FMA,!exp_dur == "na")
# Remove studies missing age at baseline
FMA<-subset(FMA,!age_base == "na")
# Remove studies missing days post-stroke
FMA<-subset(FMA,!days_ps == "na")
# Remove studies missing terminal effect-sizes
FMA<-subset(FMA,!term_d == "na")
length(FMA$author)
# Length for the FMA data should be 79
sum(FMA$base_n)
# Sum of baseline n should be 1,741
# Descriptive Statistics for the FMA data
summary(FMA$age_base)
sd(FMA$age_base, na.rm=TRUE)
summary(FMA$days_ps)
sd(FMA$days_ps, na.rm=TRUE)
summary(FMA$exp_dur)
sd(FMA$exp_dur, na.rm=TRUE)
summary(FMA$time_50)
sd(FMA$time_50, na.rm=TRUE)
summary(FMA$group)
summary(FMA$base_n)
sd(FMA$base_n, na.rm=TRUE)
summary(FMA$term_g)
sd(FMA$term_g, na.rm=TRUE)
#Regression models for FMA
f0<-rma(term_g, term_Vg, data=FMA, method="ML")
f0
# Moderator Analysis of Group
summary(FMA$group)
FMA$exp.c<-as.numeric(FMA$group)-1.5
head(FMA)
f1<-rma(term_g~exp.c, term_Vg, data=FMA, method="ML")
f1
# Moderator Analysis of Groups and DPS
summary(FMA$days_ps)
FMA$DPS.c<-FMA$days_ps-mean(FMA$days_ps)
hist(FMA$DPS.c)
FMA$rtDPS<-sqrt(FMA$days_ps)
summary(FMA$rtDPS)
FMA$rtDPS.c<-FMA$rtDPS-mean(FMA$rtDPS)
hist(FMA$rtDPS.c)
mean(FMA$rtDPS.c)
f2<-rma(term_g~exp.c+rtDPS.c, term_Vg, data=FMA, method="ML")
f2
# Moderator Analysis of Group and Estimated Time Scheduled
summary(FMA$time_50)
hist(FMA$time_50)
FMA$time50.c<-FMA$time_50-mean(FMA$time_50)
FMA$rtTIME<-sqrt(FMA$time_50)
summary(FMA$rtTIME)
FMA$rtTIME.c<-FMA$rtTIME-mean(FMA$rtTIME)
hist(FMA$rtTIME)
f3<-rma(term_g~exp.c+rtDPS.c+rtTIME.c, term_Vg, data=FMA, method="ML")
f3
plot(f3)
# Adding the interaction of DPS and TIME
f4<-rma(term_g~exp.c+rtDPS.c*rtTIME.c, term_Vg, data=FMA, method="ML")
f4
confint(f4)
plot(f4)
# Moderator Analysis of Group, DPS, and Age
summary(FMA$age_base)
hist(FMA$age_base)
FMA$age.c<-FMA$age_base-mean(FMA$age_base)
f5<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c, term_Vg, data=FMA, method="ML")
f5
confint(f5)
# Controlling for the effect of therapy duration
summary(FMA$exp_dur)
hist(FMA$exp_dur)
FMA$DUR.c<-FMA$exp_dur-mean(FMA$exp_dur)
FMA$rtDUR<-sqrt(FMA$exp_dur)
summary(FMA$rtDUR)
hist(FMA$rtDUR)
FMA$rtDUR.c<-FMA$rtDUR-mean(FMA$rtDUR)
## Checking for colinearity between TIME and DURATION
plot(FMA$rtDUR.c,FMA$rtTIME.c)
cor.test(FMA$rtDUR.c,FMA$rtTIME.c)
f6<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c+rtDUR.c, term_Vg, data=FMA, method="ML")
f6
# Comparisons between the different FMA models
summary(f0)
# logLik deviance AIC BIC AICc
# -26.1594 129.2677 56.3187 61.0576 56.4766
summary(f1)
# logLik deviance AIC BIC AICc
# -22.5504 122.0497 51.1007 58.2091 51.4207
summary(f2)
# logLik deviance AIC BIC AICc
# -13.7268 104.4027 35.4537 44.9315 35.9942
summary(f3)
# logLik deviance AIC BIC AICc
# -13.5434 104.0359 37.0869 48.9341 37.9088
summary(f4)
# logLik deviance AIC BIC AICc
# -13.5408 104.0307 39.0817 53.2984 40.2484
summary(f5)
# logLik deviance AIC BIC AICc
# -10.2347 97.4185 34.4695 51.0556 36.0469
summary(f6)
# logLik deviance AIC BIC AICc
# -9.5106 95.9703 35.0213 53.9769 37.0784
# So F2/f5 are our best fitting models based on the AIC and AICc
# but lets double-check our statistical assumptions/diagnostics
# The distribution of residuals for this model is approximately normal
plot(density(resid(f5)))
# The distribution of residuals for this model show some heteroscedasticity
plot(resid(f5)~fitted(f5))
# but not a significant relationship between fitted values and residuals.
cor.test(resid(f5),fitted(f5))
# Exporting the FMA data to a .csv file
write.csv(FMA, file="data SCOAR FMA ANALYSIS.csv")
## Figure 3A -------------------------------------------------------------------
# Plots of the FMA data
palette(c("#ffffcc", "#a1dab4", "#41b6c4", "#225ea8"))
summary(FMA$days_cat)
plot(FMA$term_g~FMA$rtTIME, bty='n', type='p', pch=21, bg=FMA$days_cat,
col="black", lwd=1.0, xlim=c(-1,15), ylim=c(-0.5,3.5), cex.axis=1.25,
cex.lab=1.25, cex=sqrt(FMA$base_n)/2,
ylab="Terminal FMA (g)", xlab="Time Scheduled for Therapy (sqrt(hrs))")
legend("top", inset=.02, title="Days Post Stroke",
c("<90 Days","<1 Year",">1 Year"),
fill=c("#41b6c4", "#a1dab4", "#225ea8"), horiz=TRUE, cex=0.8)
## -----------------------------------------------------------------------------
#Plotting relationships between the baseline and terminal means
g1<-ggplot(FMA, aes(x = base_m, y = term_m, color = as.factor(outcome_name))) +
geom_point(size = 4, alpha=0.5) + theme(text = element_text(size=20)) +
#geom_smooth(method=lm)+
#scale_color_manual(values=c("#007360","#9b1d36","#369cba")) +
theme(panel.background=element_rect(fill="white", color="gray"),
panel.grid.major=element_line(color="gray"),
panel.grid.minor=element_line(color="gray")) +
labs(x = "FMA at Baseline", y = "FMA at Terminal Assessment")
print(g1)
## -----------------------------------------------------------------------------
################################################
### Analysis of the gait speed measures only ###
################################################
# only one outcome per study
SPEED<-subset(DATA, speed_outcome == "speed")
# Remove studies missing time for therapy
SPEED<-subset(SPEED,!time_50 == "na")
# Remove studies missing duration of therapy
SPEED<-subset(SPEED,!exp_dur == "na")
# Remove studies missing age at baseline
SPEED<-subset(SPEED,!age_base == "na")
# Remove studies missing days post-stroke
SPEED<-subset(SPEED,!days_ps == "na")
# Remove studies missing terminal effect-sizes
SPEED<-subset(SPEED,!term_d == "na")
length(SPEED$author)
# Length for the SPEED data should be 116
sum(SPEED$base_n)
# Sum of baseline n for SPEED data should be 2,609
#Descriptive Statistics for the SPEED data
summary(SPEED$age_base)
sd(SPEED$age_base, na.rm=TRUE)
summary(SPEED$days_ps)
sd(SPEED$days_ps, na.rm=TRUE)
summary(SPEED$exp_dur)
sd(SPEED$exp_dur, na.rm=TRUE)
summary(SPEED$time_50)
sd(SPEED$time_50, na.rm=TRUE)
summary(SPEED$group)
summary(SPEED$base_n)
sd(SPEED$base_n, na.rm=TRUE)
summary(SPEED$term_g)
sd(SPEED$term_g, na.rm=TRUE)
# Statistical models for the speed data
s0<-rma(term_g, term_Vg, data=SPEED, method="ML")
s0
# Moderator Analysis of Group
summary(SPEED$group)
SPEED$exp.c<-as.numeric(SPEED$group)-1.5
head(SPEED)
s1<-rma(term_g~exp.c, term_Vg, data=SPEED, method="ML")
s1
# Moderator Analysis of Groups and DPS
summary(SPEED$days_ps)
SPEED$DPS.c<-SPEED$days_ps-mean(SPEED$days_ps)
hist(SPEED$DPS.c)
SPEED$rtDPS<-sqrt(SPEED$days_ps)
summary(SPEED$rtDPS)
SPEED$rtDPS.c<-SPEED$rtDPS-mean(SPEED$rtDPS)
hist(SPEED$rtDPS.c)
s2<-rma(term_g~exp.c+rtDPS.c, term_Vg, data=SPEED, method="ML")
s2
# Moderator Analysis of Group and Estimated Time Scheduled
summary(SPEED$time_50)
hist(SPEED$time_50)
SPEED$time50.c<-SPEED$time_50-mean(SPEED$time_50)
SPEED$rtTIME<-sqrt(SPEED$time_50)
summary(SPEED$rtTIME)
SPEED$rtTIME.c<-SPEED$rtTIME-mean(SPEED$rtTIME)
hist(SPEED$rtTIME.c)
s3<-rma(term_g~exp.c+rtDPS.c+rtTIME.c, term_Vg, data=SPEED, method="ML")
s3
plot(s3)
# Adding the interaction of DPS and TIME
s4<-rma(term_g~exp.c+rtDPS.c*rtTIME.c, term_Vg, data=SPEED, method="ML")
s4
plot(s4)
# Moderator Analysis of Group, DPS, and Age
summary(SPEED$age_base)
hist(SPEED$age_base)
SPEED$age.c<-SPEED$age_base-mean(SPEED$age_base)
s5<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c, term_Vg, data=SPEED, method="ML")
s5
confint(s5)
# Controlling for the effect of therapy duration
summary(SPEED$exp_dur)
hist(SPEED$exp_dur)
SPEED$DUR.c<-SPEED$exp_dur-mean(SPEED$exp_dur)
SPEED$rtDUR<-sqrt(SPEED$exp_dur)
summary(SPEED$rtDUR)
hist(SPEED$rtDUR)
SPEED$rtDUR.c<-SPEED$rtDUR-mean(SPEED$rtDUR)
# Checking for colinearity between TIME and DURATION
plot(SPEED$rtDUR.c,SPEED$rtTIME.c)
cor.test(SPEED$rtDUR.c,SPEED$rtTIME.c)
s6<-rma(term_g~exp.c+rtDPS.c*rtTIME.c+age.c+rtDUR.c, term_Vg, data=SPEED, method="ML")
s6
# Comparisons between the different SPEED models
summary(s0)
# logLik deviance AIC BIC AICc
# -80.4021 265.5184 164.8041 170.3113 164.9103
summary(s1)
# logLik deviance AIC BIC AICc
# -76.6772 258.0687 159.3544 167.6151 159.5686
summary(s2)
# logLik deviance AIC BIC AICc
# -52.6510 210.0163 113.3019 124.3163 113.6623
summary(s3)
# logLik deviance AIC BIC AICc
# -49.8661 204.4464 109.7321 123.5001 110.2776
summary(s4)
# logLik deviance AIC BIC AICc
# -48.0225 200.7593 108.0450 124.5665 108.8156
summary(s5)
# logLik deviance AIC BIC AICc
# -44.9879 194.6902 103.9758 123.2510 105.0129
summary(s6)
# logLik deviance AIC BIC AICc
# -43.8895 192.4934 103.7791 125.8078 105.1249
# So s5 is our best fitting model based on the AIC and AICc
# but lets double-check our statistical assumptions/diagnostics
# The distribution of residuals for this model is approximately normal
plot(density(resid(s5)))
# The distribution of residuals for this model show some heteroscedasticity
plot(resid(s5)~fitted(s5))
# but not a significant relationship between fitted values and residuals.
cor.test(resid(s5),fitted(s5))
#Exporting the SPEED data to a .csv file
#write.csv(SPEED, file="data SCOAR SPEED ANALYSIS.csv")
## Figure 3B -------------------------------------------------------------------
# Plots of the SPEED data
palette(c("#ffffcc", "#a1dab4", "#41b6c4", "#225ea8"))
plot(SPEED$term_g~SPEED$rtTIME, bty='n', type='p', pch=21, bg=SPEED$days_cat,
col="black", lwd=1.0, xlim=c(-1,15), ylim=c(-0.5,3.5), cex.axis=1.25,
cex.lab=1.25, cex=sqrt(SPEED$base_n)/2,
ylab="Terminal Gait Speed (g)",
xlab="Time Scheduled for Therapy (sqrt(hrs))")
legend("top", inset=.02, title="Days Post Stroke",
c("<90 Days","<1 Year",">1 Year"),
fill=c("#41b6c4", "#a1dab4", "#225ea8"), horiz=TRUE, cex=0.8)
## -----------------------------------------------------------------------------
# There is an issue with plotting baseline means against terminal means
# First the Fuzaro study need to be removed because it used %Time and not m/s
# Second all of the studies that used cm/s need to be converted to m/s
# This has already been done in the SCOAR SPEED ANALYSIS.csv file on github
# ... so download it from github or recreate the above steps before graphing!
SPEED_Graph<-read.table("data SCOAR SPEED ANALYSIS.csv", header = TRUE, sep=",")
head(SPEED_Graph)
#Plotting relationships
g1<-ggplot(SPEED_Graph, aes(x = base_mCON, y = term_mCON, color = as.factor(outcome_name))) +
geom_point(size = 4, alpha=0.5) + theme(text = element_text(size=20)) +
xlim(0,2)+ylim(0,2)+
#geom_smooth(method=lm)+
#scale_color_manual(values=c("#007360","#9b1d36","#369cba")) +
theme(panel.background=element_rect(fill="white", color="gray"),
panel.grid.major=element_line(color="gray"),
panel.grid.minor=element_line(color="gray")) +
labs(x = "Speed at Baseline (m/s)", y = "Speed at Terminal Assessment (m/s)")
print(g1)