-
Notifications
You must be signed in to change notification settings - Fork 5
PISA 2012 design background questions (ST89) as latent regressors
We examined known group differences against estimated group differences under the following conditions: 2 (IRT models) x 2 (IRT R packages) x 3 (sample sizes).
The generating parameters used in this simulation are saved in the lsasim
package. The package itself contains three datasets to aid in the generation of item responses from the mathematics portion of the PISA 2012 "standard" test booklets. We selected four background varaibles from Attitude towards school: Learning outcomes (ATSCHL) scale (ST88Q01, ST88Q02, ST88Q03, ST88Q04) to identify group differences and the corresponding latent trait (PV1MATH).
Attitude towards school: Learning outcomes (ATSCHL) scale: Thinking about what you have learned at school: to what extent do you agree with the following statements?
-
ST88Q01 a) School has done little to prepare me for adult life when I leave school
-
ST88Q02 b) School has been a waste of time
-
ST88Q03 c) School has helped give me confidence to make decisions
-
ST88Q04 d) School has taught me things which could be useful in a job
- 1 Strongly agree
- 2 Agree
- 3 Disagree
- 4 Strongly disagree
- Two types of IRT models were used: Rasch items and partial credit (PC) items
- Item parameters were drawn from PISA 2012 mathematics items used in standard booklets
- There were 76 Rasch items and 8 PC items
- The total 84 items were administered in the 13 standard test booklets
- Two IRT R packages were evaluated:
TAM
(version 2.4-9) andmirt
(version 1.25). Each package used a unique estimator. These are:- Warm's weighted likelihood estimates (WLE) using
TAM
- Expected-a-posteriori (EAP) using
mirt
- Warm's weighted likelihood estimates (WLE) using
- Three sample sizes were used: 2000, 4000, and 6000
- Simulated samples were based on PISA 2012 data
- One hundred replications were used for each condition for the calibration
- Person trait recovery:
- Known (true) group differences against estimated group differences recovered well when using WLE than EAP
# Load libraries
if(!require(lsasim)){
install.packages("lsasim")
library(lsasim) #version 1.0.1
}
if(!require(mirt)){
install.packages("mirt")
library(mirt) #1.25
}
if(!require(TAM)){
install.packages("TAM")
library(TAM) #2.4-9
}
if(!require(psych)){
install.packages("psych")
library(psych) #1.7.5
}
# Set up conditions
N.cond <- c(2000, 4000, 6000) #number of sample sizes
# Set up number of replications
reps <- 100
# Create space for outputs
results <- NULL
#==============================================================================#
# Background questionnaire selection
#==============================================================================#
# extract items from ST88 scale (ST88Q01, ST88Q02, ST88Q03, ST88Q04) and PV1MATH
pisa2012_cat_prop <- lsasim::pisa2012_q_marginal[c(11:14,19)]
print(pisa2012_cat_prop)
## $ST88Q01
## [1] 0.065 0.311 0.790 1.000
##
## $ST88Q02
## [1] 0.029 0.102 0.548 1.000
##
## $ST88Q03
## [1] 0.197 0.752 0.945 1.000
##
## $ST88Q04
## [1] 0.452 0.895 0.974 1.000
##
## $PV1MATH
## [1] 1
pisa2012_cor_matrix <- lsasim::pisa2012_q_cormat[c(11:14,19), c(11:14,19)]
print(pisa2012_cor_matrix)
## ST88Q01 ST88Q02 ST88Q03 ST88Q04 PV1MATH
## ST88Q01 1.00000000 0.4825506 -0.33800998 -0.38529205 0.08013438
## ST88Q02 0.48255064 1.0000000 -0.31299452 -0.42906406 0.17632929
## ST88Q03 -0.33800998 -0.3129945 1.00000000 0.47072970 0.05816648
## ST88Q04 -0.38529205 -0.4290641 0.47072970 1.00000000 -0.01592925
## PV1MATH 0.08013438 0.1763293 0.05816648 -0.01592925 1.00000000
#==============================================================================#
# START SIMULATION
#==============================================================================#
for (r in 1:reps) { #replication
for (N in N.cond) { #sample size
set.seed(17228)
n_examinees <- N
### -- Background questionnaire generation
# generate background questionnaire data
pisa_background <- questionnaire_gen(n = n_examinees,
cat_prop = pisa2012_cat_prop,
cor_matrix = pisa2012_cor_matrix)
### -- Cognitive assessment generation
# assign items to blocks
pisa2012_math_block_mat <- as.matrix(pisa2012_math_block[, -c(1:2)])
pisa_blocks <- lsasim::block_design(item_parameters = pisa2012_math_item,
item_block_matrix = pisa2012_math_block_mat)
#assign blocks to booklets
pisa2012_math_book_mat <- as.matrix(pisa2012_math_booklet[, -1])
pisa_books <- lsasim::booklet_design(item_block_assignment =
pisa_blocks$block_assignment,
book_design = pisa2012_math_book_mat)
#assign booklets to subjects
subj_booklets <- lsasim::booklet_sample(n_subj = N,
book_item_design = pisa_books)
#subset items in standard booklets
subitems <- sort(unique(subj_booklets$item))
pisa_items <- pisa2012_math_item[subitems, ]
# generate item responses
pisa_ir <- lsasim::response_gen(subject = subj_booklets$subject,
item = subj_booklets$item,
theta = pisa_background$PV1MATH,
item_no = pisa_items$item,
b_par = pisa_items$b,
d_par = list(pisa_items$d1,
pisa_items$d2))
# -- Merge questionnaire data with cognitive assessment data
pisa_data <- merge(pisa_background, pisa_ir, by = "subject")
# extract item responses (excluding "subject" column)
resp <- pisa_ir[, c(1:length(pisa_items$item))]
#------------------------------------------------------------------------------#
# Model estimation
#------------------------------------------------------------------------------#
# model 1: fit Rasch and PC models using mirt package
mirt.mod <- NULL
mirt.mod <- mirt::mirt(resp,1, itemtype = 'Rasch',
technical = list( NCYCLES = 500), verbose = F)
# model 2: fit Rasch and PC models using TAM package
tam.mod <- NULL
tam.mod <- TAM::tam.mml(resp, irtmodel="PCM2")
# model 3: fit Rasch and PC models with latent regressors using TAM package
# latent regressors Y
regressors <- pisa_background[,paste0("ST88Q0",1:4)]
regressors$ST88Q01.new <- data.frame(psych::dummy.code(regressors$ST88Q01))[,1:3]
#category 4 as reference for ST88Q01
regressors$ST88Q02.new <- data.frame(psych::dummy.code(regressors$ST88Q02))[,1:3]
#category 4 as reference for ST88Q02
regressors$ST88Q03.new <- data.frame(psych::dummy.code(regressors$ST88Q03))[,1:3]
#category 4 as reference for ST88Q03
regressors$ST88Q04.new <- data.frame(psych::dummy.code(regressors$ST88Q04))[,1:3]
#category 4 as reference for ST88Q04
Y <- cbind(regressors$ST88Q01.new,
regressors$ST88Q02.new,
regressors$ST88Q03.new,
regressors$ST88Q04.new)
colnames(Y) <- c(paste0("ST88Q01_cat",1:3), paste0("ST88Q02_cat",1:3),
paste0("ST88Q03_cat",1:3), paste0("ST88Q04_cat",1:3))
tam.mod.2 <- NULL
tam.mod.2 <- TAM::tam.mml(resp, irtmodel="PCM2", Y=Y)
#------------------------------------------------------------------------------#
# Person parameter extraction
#------------------------------------------------------------------------------#
# extract thetas
pisa_data$mirt.eap <- c(fscores(mirt.mod, method="EAP"))
pisa_data$tam.wle <- tam.wle(tam.mod)$theta
pisa_data$tam.reg <- tam.wle(tam.mod.2)$theta
# summarize background variables (qs), generalized theta, and estimated thetas
FS <- pisa_data[,c("subject", "ST88Q01", "ST88Q02", "ST88Q03", "ST88Q04",
"PV1MATH", "mirt.eap", "tam.wle", "tam.reg")]
# summarize results
person <- data.frame(matrix(c(N, r), nrow = 1))
colnames(person) <- c("N", "rep")
person <- cbind(person, FS)
# combine results
results <- rbind(results, person)
}
}
Summary:
-
We summarized the group differences based on four background variables (ST88Q01, ST88Q02, ST88Q03, and ST88Q04), which were socred based on 4-point Likert scale. For each variable, we compared the generating theta difference to estimated theta difference between all possible combinations ( 1 vs 2, 1 vs 3, 1 vs 4, 2 vs 3, 2 vs 4, and 3 vs 4).
-
theta
stands for the generating theta value,mirt_EAP
stands for the EAP estimator calibrated bymirt
package,tam_WLE
stands for the WLE estimator calibrated byTAM
package, andtam_REG
stands for the WLE estimator generated using a model with regressors byTAM
package.
pairs <- matrix(c(1,2,1,3,1,4,2,3,2,4,3,4), ncol = 2, byrow = T)
print(pairs)
## [,1] [,2]
## [1,] 1 2
## [2,] 1 3
## [3,] 1 4
## [4,] 2 3
## [5,] 2 4
## [6,] 3 4
- Group differences by ST88Q01
ST88Q01.out <- NULL
ST88Q01.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q01 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST88Q01.agg [ST88Q01.agg$N==n,]
for (p in 1:nrow(pairs)){
ST88Q01 <- NULL
ST88Q01$N <- n
ST88Q01$GP1 <- pairs[p,1]
ST88Q01$GP2 <- pairs[p,1]
ST88Q01$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST88Q01$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST88Q01$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST88Q01$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST88Q01.out <- rbind(ST88Q01.out, ST88Q01)
}
}
print(ST88Q01.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q01 2000 1 1 -0.085 -0.046 -0.061 -0.061
## ST88Q01 2000 1 1 -0.123 -0.072 -0.092 -0.092
## ST88Q01 2000 1 1 -0.247 -0.131 -0.166 -0.166
## ST88Q01 2000 2 2 -0.038 -0.025 -0.031 -0.031
## ST88Q01 2000 2 2 -0.162 -0.085 -0.105 -0.106
## ST88Q01 2000 3 3 -0.124 -0.06 -0.074 -0.074
## ST88Q01 4000 1 1 -0.11 -0.076 -0.097 -0.097
## ST88Q01 4000 1 1 -0.193 -0.169 -0.209 -0.209
## ST88Q01 4000 1 1 -0.304 -0.237 -0.289 -0.289
## ST88Q01 4000 2 2 -0.084 -0.093 -0.113 -0.113
## ST88Q01 4000 2 2 -0.195 -0.162 -0.192 -0.192
## ST88Q01 4000 3 3 -0.111 -0.069 -0.08 -0.08
## ST88Q01 6000 1 1 -0.035 -0.044 -0.055 -0.055
## ST88Q01 6000 1 1 -0.091 -0.09 -0.11 -0.11
## ST88Q01 6000 1 1 -0.245 -0.217 -0.269 -0.269
## ST88Q01 6000 2 2 -0.056 -0.046 -0.055 -0.055
## ST88Q01 6000 2 2 -0.21 -0.173 -0.214 -0.214
## ST88Q01 6000 3 3 -0.154 -0.127 -0.159 -0.159
- Group differences by ST88Q02
ST88Q02.out <- NULL
ST88Q02.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q02 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST88Q02.agg [ST88Q02.agg$N==n,]
for (p in 1:nrow(pairs)){
ST88Q02 <- NULL
ST88Q02$N <- n
ST88Q02$GP1 <- pairs[p,1]
ST88Q02$GP2 <- pairs[p,2]
ST88Q02$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST88Q02$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST88Q02$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST88Q02$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST88Q02.out <- rbind(ST88Q02.out, ST88Q02)
}
}
print(ST88Q02.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q02 2000 1 2 -0.292 -0.238 -0.294 -0.294
## ST88Q02 2000 1 3 -0.373 -0.276 -0.345 -0.345
## ST88Q02 2000 1 4 -0.62 -0.478 -0.594 -0.595
## ST88Q02 2000 2 3 -0.082 -0.039 -0.051 -0.051
## ST88Q02 2000 2 4 -0.329 -0.241 -0.3 -0.3
## ST88Q02 2000 3 4 -0.247 -0.202 -0.249 -0.249
## ST88Q02 4000 1 2 -0.181 -0.1 -0.158 -0.158
## ST88Q02 4000 1 3 -0.51 -0.395 -0.507 -0.507
## ST88Q02 4000 1 4 -0.704 -0.552 -0.701 -0.701
## ST88Q02 4000 2 3 -0.329 -0.295 -0.349 -0.349
## ST88Q02 4000 2 4 -0.523 -0.451 -0.543 -0.543
## ST88Q02 4000 3 4 -0.194 -0.157 -0.194 -0.194
## ST88Q02 6000 1 2 -0.175 -0.108 -0.121 -0.121
## ST88Q02 6000 1 3 -0.395 -0.271 -0.324 -0.324
## ST88Q02 6000 1 4 -0.593 -0.436 -0.528 -0.528
## ST88Q02 6000 2 3 -0.22 -0.163 -0.203 -0.203
## ST88Q02 6000 2 4 -0.418 -0.329 -0.407 -0.407
## ST88Q02 6000 3 4 -0.198 -0.165 -0.204 -0.204
- Group differences by ST88Q03
ST88Q03.out <- NULL
ST88Q03.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q03 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST88Q03.agg [ST88Q03.agg$N==n,]
for (p in 1:nrow(pairs)){
ST88Q03 <- NULL
ST88Q03$N <- n
ST88Q03$GP1 <- pairs[p,1]
ST88Q03$GP2 <- pairs[p,2]
ST88Q03$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST88Q03$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST88Q03$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST88Q03$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST88Q03.out <- rbind(ST88Q03.out, ST88Q03)
}
}
print(ST88Q03.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q03 2000 1 2 -0.126 -0.1 -0.131 -0.131
## ST88Q03 2000 1 3 -0.204 -0.168 -0.203 -0.203
## ST88Q03 2000 1 4 -0.067 -0.042 -0.062 -0.062
## ST88Q03 2000 2 3 -0.078 -0.068 -0.072 -0.072
## ST88Q03 2000 2 4 0.059 0.058 0.069 0.069
## ST88Q03 2000 3 4 0.137 0.126 0.141 0.141
## ST88Q03 4000 1 2 -0.056 -0.034 -0.047 -0.047
## ST88Q03 4000 1 3 -0.069 -0.047 -0.06 -0.06
## ST88Q03 4000 1 4 -0.044 -0.048 -0.049 -0.049
## ST88Q03 4000 2 3 -0.013 -0.012 -0.013 -0.013
## ST88Q03 4000 2 4 0.012 -0.014 -0.003 -0.002
## ST88Q03 4000 3 4 0.024 -0.001 0.011 0.011
## ST88Q03 6000 1 2 -0.064 -0.04 -0.053 -0.053
## ST88Q03 6000 1 3 -0.099 -0.046 -0.058 -0.058
## ST88Q03 6000 1 4 -0.229 -0.158 -0.178 -0.178
## ST88Q03 6000 2 3 -0.035 -0.006 -0.004 -0.004
## ST88Q03 6000 2 4 -0.165 -0.118 -0.124 -0.124
## ST88Q03 6000 3 4 -0.13 -0.112 -0.12 -0.12
- Group differences by ST88Q04
ST88Q04.out <- NULL
ST88Q04.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q04 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST88Q04.agg [ST88Q04.agg$N==n,]
for (p in 1:nrow(pairs)){
ST88Q04 <- NULL
ST88Q04$N <- n
ST88Q04$GP1 <- pairs[p,1]
ST88Q04$GP2 <- pairs[p,2]
ST88Q04$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST88Q04$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST88Q04$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST88Q04$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST88Q04.out <- rbind(ST88Q04.out, ST88Q04)
}
}
print(ST88Q04.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q04 2000 1 2 0.008 0.02 0.027 0.027
## ST88Q04 2000 1 3 0.08 0.023 0.011 0.011
## ST88Q04 2000 1 4 0.139 0.114 0.148 0.148
## ST88Q04 2000 2 3 0.073 0.003 -0.016 -0.016
## ST88Q04 2000 2 4 0.131 0.094 0.121 0.122
## ST88Q04 2000 3 4 0.059 0.091 0.137 0.137
## ST88Q04 4000 1 2 0.013 0.008 0.008 0.008
## ST88Q04 4000 1 3 0.041 0.089 0.105 0.105
## ST88Q04 4000 1 4 0.062 0.034 0.047 0.047
## ST88Q04 4000 2 3 0.028 0.08 0.097 0.097
## ST88Q04 4000 2 4 0.05 0.026 0.039 0.039
## ST88Q04 4000 3 4 0.022 -0.054 -0.058 -0.058
## ST88Q04 6000 1 2 0.047 0.027 0.033 0.033
## ST88Q04 6000 1 3 0.073 0.035 0.048 0.048
## ST88Q04 6000 1 4 -0.044 -0.013 0.011 0.011
## ST88Q04 6000 2 3 0.026 0.008 0.015 0.015
## ST88Q04 6000 2 4 -0.091 -0.039 -0.022 -0.022
## ST88Q04 6000 3 4 -0.117 -0.047 -0.036 -0.036