-
Notifications
You must be signed in to change notification settings - Fork 5
PISA 2012 design background questions (ST89) as latent regressors
We examined known group differences against estimated group differences under the following conditions: 2 (IRT models) x 2 (IRT R packages) x 3 (sample sizes).
The generating parameters used in this simulation were saved at lsasim
package which contains three datasets to aid in the generation of item responses from the mathematics portion of the PISA 2012 "standard" test booklets. We selected four background varaibles from Attitude towards school: Learning activities (ATTLNACT) scale (ST89Q02, ST89Q03, ST89Q04, ST89Q05) to identify group differences and the corresponding latent trait (PVMATH).
Attitude towards school: Learning activities (ATTLNACT) scale: Thinking about your school: to what extent do you agree with the following statements?
-
ST89Q02 a) Trying hard at school will help me get a good job
-
ST89Q03 b) Trying hard at school will help me get into a good
-
ST89Q04 c) I enjoy receiving good
-
ST89Q05 d) Trying hard at school is important
- 1 Strongly agree
- 2 Agree
- 3 Disagree
- 4 Strongly disagree
- Two types of IRT models were used: Rasch items and partial credit (PC) items
- Item parameters were drawn from PISA 2012 mathematics items used in standard booklets
- There were 76 Rasch items and 8 PC items
- The total 84 items were administered in the 13 standard test booklets
- Two IRT R packages were evaluated:
TAM
(version 2.4-9) andmirt
(version 1.25). Each package used a unique estimator. These are:- Warm's weighted likelihood estimates (WLE) using
TAM
- Expected-a-posteriori (EAP) using
mirt
- Warm's weighted likelihood estimates (WLE) using
- Three sample sizes were used: 2000, 4000, and 6000
- Simulated samples were based on PISA 2012 data
- One hundred replications were used for each condition for the calibration
- Person trait recovery:
- Known (true) group differences against estimated group differences recovered well when using WLE than EAP
# Load libraries
if(!require(lsasim)){
install.packages("lsasim")
library(lsasim) #version 1.0.1
}
if(!require(mirt)){
install.packages("mirt")
library(mirt) #1.25
}
if(!require(TAM)){
install.packages("TAM")
library(TAM) #2.4-9
}
if(!require(psych)){
install.packages("psych")
library(psych) #1.7.5
}
# Set up conditions
N.cond <- c(2000, 4000, 6000) #number of sample sizes
# Set up number of replications
reps <- 100
# Create space for outputs
results <- NULL
#==============================================================================#
# Background questionnaire selection
#==============================================================================#
# extract items from ST89 scale (ST89Q02, ST89Q03, ST89Q04, ST89Q05) and PV1MATH
pisa2012_cat_prop <- lsasim::pisa2012_q_marginal[c(15:18,19)]
print(pisa2012_cat_prop)
## $ST89Q02
## [1] 0.433 0.908 0.988 1.000
##
## $ST89Q03
## [1] 0.544 0.891 0.973 1.000
##
## $ST89Q04
## [1] 0.735 0.981 0.995 1.000
##
## $ST89Q05
## [1] 0.437 0.935 0.988 1.000
##
## $PV1MATH
## [1] 1
pisa2012_cor_matrix <- lsasim::pisa2012_q_cormat[c(15:18,19), c(15:18,19)]
print(pisa2012_cor_matrix)
## ST89Q02 ST89Q03 ST89Q04 ST89Q05 PV1MATH
## ST89Q02 1.0000000 0.5622499 0.4510371 0.63679841 -0.03419140
## ST89Q03 0.5622499 1.0000000 0.4603968 0.47993703 -0.26238202
## ST89Q04 0.4510371 0.4603968 1.0000000 0.62383392 -0.14810388
## ST89Q05 0.6367984 0.4799370 0.6238339 1.00000000 0.04829041
## PV1MATH -0.0341914 -0.2623820 -0.1481039 0.04829041 1.00000000
#==============================================================================#
# START SIMULATION
#==============================================================================#
for (r in 1:reps) { #replication
for (N in N.cond) { #sample size
set.seed(17228)
n_examinees <- N
### -- Background questionnaire generation
# generate background questionnaire data
pisa_background <- questionnaire_gen(n = n_examinees,
cat_prop = pisa2012_cat_prop,
cor_matrix = pisa2012_cor_matrix)
### -- Cognitive assessment generation
# assign items to blocks
pisa2012_math_block_mat <- as.matrix(pisa2012_math_block[, -c(1:2)])
pisa_blocks <- lsasim::block_design(item_parameters = pisa2012_math_item,
item_block_matrix = pisa2012_math_block_mat)
#assign blocks to booklets
pisa2012_math_book_mat <- as.matrix(pisa2012_math_booklet[, -1])
pisa_books <- lsasim::booklet_design(item_block_assignment =
pisa_blocks$block_assignment,
book_design = pisa2012_math_book_mat)
#assign booklets to subjects
subj_booklets <- lsasim::booklet_sample(n_subj = N,
book_item_design = pisa_books)
#subset items in standard booklets
subitems <- sort(unique(subj_booklets$item))
pisa_items <- pisa2012_math_item[subitems, ]
# generate item responses
pisa_ir <- lsasim::response_gen(subject = subj_booklets$subject,
item = subj_booklets$item,
theta = pisa_background$PV1MATH,
item_no = pisa_items$item,
b_par = pisa_items$b,
d_par = list(pisa_items$d1,
pisa_items$d2))
# -- Merge questionnaire data with cognitive assessment data
pisa_data <- merge(pisa_background, pisa_ir, by = "subject")
# extract item responses (excluding "subject" column)
resp <- pisa_ir[, c(1:length(pisa_items$item))]
#------------------------------------------------------------------------------#
# Model estimation
#------------------------------------------------------------------------------#
# model 1: fit Rasch and PC models using mirt package
mirt.mod <- NULL
mirt.mod <- mirt::mirt(resp,1, itemtype = 'Rasch',
technical = list( NCYCLES = 500), verbose = F)
# model 2: fit Rasch and PC models using TAM pckage
tam.mod <- NULL
tam.mod <- TAM::tam.mml(resp, irtmodel="PCM2")
# model 3: fit Rasch and PC models with latent regressors using TAM package
# latent regressors Y
regressors <- pisa_background[,paste0("ST89Q0",2:5)]
regressors$ST89Q02.new <- data.frame(psych::dummy.code(regressors$ST89Q02))[,1:3]
#category 4 as reference for ST89Q02
regressors$ST89Q03.new <- data.frame(psych::dummy.code(regressors$ST89Q03))[,1:3]
#category 4 as reference for ST89Q03
regressors$ST89Q04.new <- data.frame(psych::dummy.code(regressors$ST89Q04))[,1:3]
#category 4 as reference for ST89Q04
regressors$ST89Q05.new <- data.frame(psych::dummy.code(regressors$ST89Q05))[,1:3]
#category 4 as reference for ST89Q05
Y <- cbind(regressors$ST89Q02.new,
regressors$ST89Q03.new,
regressors$ST89Q04.new,
regressors$ST89Q05.new)
colnames(Y) <- c(paste0("ST89Q02_cat",1:3), paste0("ST89Q03_cat",1:3),
paste0("ST89Q04_cat",1:3), paste0("ST89Q05_cat",1:3))
tam.mod.2 <- NULL
tam.mod.2 <- TAM::tam.mml(resp, irtmodel="PCM2", Y=Y)
#------------------------------------------------------------------------------#
# Person parameter extraction
#------------------------------------------------------------------------------#
# extract thetas
pisa_data$mirt.eap <- c(fscores(mirt.mod, method="EAP"))
pisa_data$tam.wle <- tam.wle(tam.mod)$theta
pisa_data$tam.reg <- tam.wle(tam.mod.2)$theta
# summarize background variables (qs), generalized theta, and estimated thetas
FS <- pisa_data[,c("subject", "ST89Q02", "ST89Q03", "ST89Q04", "ST89Q05",
"PV1MATH", "mirt.eap", "tam.wle", "tam.reg")]
# summarize results
person <- data.frame(matrix(c(N, r), nrow = 1))
colnames(person) <- c("N", "rep")
person <- cbind(person, FS)
# combine results
results <- rbind(results, person)
}
}
Summary:
-
We summarized the group differences based on four background variables (ST89Q02, ST89Q03, ST89Q04, and ST89Q05), which were socred based on 4-point Likert scale. For each variable, we compared the generating theta difference to estimated theta difference between all possible combinations ( 1 vs 2, 1 vs 3, 1 vs 4, 2 vs 3, 2 vs 4, and 3 vs 4).
-
theta
stands for the generating theta value,mirt_EAP
stands for the EAP estimator calibrated bymirt
package,tam_WLE
stands for the WLE estimator calibrated byTAM
package, andtam_REG
stands for the WLE estimator generated using a model with regressors byTAM
package.
pairs <- matrix(c(1,2,1,3,1,4,2,3,2,4,3,4), ncol = 2, byrow = T)
print(pairs)
## [,1] [,2]
## [1,] 1 2
## [2,] 1 3
## [3,] 1 4
## [4,] 2 3
## [5,] 2 4
## [6,] 3 4
- Group differences by ST89Q02
ST89Q02.out <- NULL
ST89Q02.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q02 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST89Q02.agg [ST89Q02.agg$N==n,]
for (p in 1:nrow(pairs)){
ST89Q02 <- NULL
ST89Q02$N <- n
ST89Q02$GP1 <- pairs[p,1]
ST89Q02$GP2 <- pairs[p,2]
ST89Q02$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST89Q02$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST89Q02$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST89Q02$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST89Q02.out <- rbind(ST89Q02.out, ST89Q02)
}
}
print(ST89Q02.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q02 2000 1 2 0.057 0.09 0.107 0.108
## ST89Q02 2000 1 3 0.159 0.118 0.13 0.13
## ST89Q02 2000 1 4 0.393 0.421 0.534 0.535
## ST89Q02 2000 2 3 0.102 0.029 0.023 0.022
## ST89Q02 2000 2 4 0.336 0.331 0.427 0.428
## ST89Q02 2000 3 4 0.234 0.303 0.404 0.405
## ST89Q02 4000 1 2 0.011 -0.001 0.002 0.002
## ST89Q02 4000 1 3 0.038 0.058 0.067 0.067
## ST89Q02 4000 1 4 0.196 0.228 0.264 0.265
## ST89Q02 4000 2 3 0.027 0.06 0.066 0.066
## ST89Q02 4000 2 4 0.185 0.229 0.263 0.263
## ST89Q02 4000 3 4 0.158 0.169 0.197 0.198
## ST89Q02 6000 1 2 0.086 0.069 0.083 0.083
## ST89Q02 6000 1 3 0.084 0.091 0.113 0.113
## ST89Q02 6000 1 4 -0.049 0.014 0.006 0.006
## ST89Q02 6000 2 3 -0.002 0.022 0.03 0.03
## ST89Q02 6000 2 4 -0.135 -0.055 -0.077 -0.077
## ST89Q02 6000 3 4 -0.134 -0.077 -0.106 -0.107
- Group differences by ST89Q03
ST89Q03.out <- NULL
ST89Q03.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q03 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST89Q03.agg [ST89Q03.agg$N==n,]
for (p in 1:nrow(pairs)){
ST89Q03 <- NULL
ST89Q03$N <- n
ST89Q03$GP1 <- pairs[p,1]
ST89Q03$GP2 <- pairs[p,2]
ST89Q03$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST89Q03$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST89Q03$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST89Q03$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST89Q03.out <- rbind(ST89Q03.out, ST89Q03)
}
}
print(ST89Q03.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q03 2000 1 2 0.317 0.238 0.3 0.3
## ST89Q03 2000 1 3 0.571 0.509 0.634 0.634
## ST89Q03 2000 1 4 1.015 0.875 1.057 1.057
## ST89Q03 2000 2 3 0.254 0.271 0.335 0.335
## ST89Q03 2000 2 4 0.698 0.637 0.757 0.758
## ST89Q03 2000 3 4 0.443 0.366 0.423 0.423
## ST89Q03 4000 1 2 0.352 0.26 0.321 0.321
## ST89Q03 4000 1 3 0.605 0.504 0.623 0.623
## ST89Q03 4000 1 4 0.664 0.533 0.675 0.675
## ST89Q03 4000 2 3 0.253 0.244 0.302 0.302
## ST89Q03 4000 2 4 0.312 0.273 0.354 0.354
## ST89Q03 4000 3 4 0.059 0.029 0.052 0.052
## ST89Q03 6000 1 2 0.389 0.307 0.381 0.381
## ST89Q03 6000 1 3 0.59 0.475 0.595 0.595
## ST89Q03 6000 1 4 0.646 0.553 0.689 0.689
## ST89Q03 6000 2 3 0.201 0.168 0.214 0.214
## ST89Q03 6000 2 4 0.257 0.246 0.308 0.308
## ST89Q03 6000 3 4 0.055 0.078 0.093 0.094
- Group differences by ST89Q04
ST89Q04.out <- NULL
ST89Q04.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q04 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST89Q04.agg [ST89Q04.agg$N==n,]
for (p in 1:nrow(pairs)){
ST89Q04 <- NULL
ST89Q04$N <- n
ST89Q04$GP1 <- pairs[p,1]
ST89Q04$GP2 <- pairs[p,2]
ST89Q04$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST89Q04$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST89Q04$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST89Q04$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST89Q04.out <- rbind(ST89Q04.out, ST89Q04)
}
}
print(ST89Q04.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q04 2000 1 2 0.225 0.203 0.255 0.255
## ST89Q04 2000 1 3 0.562 0.472 0.548 0.549
## ST89Q04 2000 1 4 0.556 0.354 0.41 0.41
## ST89Q04 2000 2 3 0.337 0.269 0.293 0.294
## ST89Q04 2000 2 4 0.331 0.151 0.155 0.155
## ST89Q04 2000 3 4 -0.006 -0.118 -0.138 -0.139
## ST89Q04 4000 1 2 0.282 0.202 0.25 0.25
## ST89Q04 4000 1 3 0.306 0.218 0.282 0.283
## ST89Q04 4000 1 4 0.607 0.303 0.373 0.373
## ST89Q04 4000 2 3 0.024 0.016 0.032 0.032
## ST89Q04 4000 2 4 0.325 0.101 0.123 0.123
## ST89Q04 4000 3 4 0.301 0.085 0.09 0.09
## ST89Q04 6000 1 2 0.207 0.189 0.236 0.236
## ST89Q04 6000 1 3 0.293 0.325 0.392 0.392
## ST89Q04 6000 1 4 0.513 0.277 0.389 0.389
## ST89Q04 6000 2 3 0.086 0.136 0.156 0.156
## ST89Q04 6000 2 4 0.306 0.087 0.153 0.153
## ST89Q04 6000 3 4 0.22 -0.048 -0.003 -0.003
- Group differences by ST89Q05
ST89Q05.out <- NULL
ST89Q05.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q05 ,
data=results, mean, na.rm=TRUE)
for (n in c(2000, 4000, 6000)){
subdata <- ST89Q05.agg [ST89Q05.agg$N==n,]
for (p in 1:nrow(pairs)){
ST89Q05 <- NULL
ST89Q05$N <- n
ST89Q05$GP1 <- pairs[p,1]
ST89Q05$GP2 <- pairs[p,2]
ST89Q05$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
ST89Q05$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
ST89Q05$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
ST89Q05$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
ST89Q05.out <- rbind(ST89Q05.out, ST89Q05)
}
}
print(ST89Q05.out)
## N GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q05 2000 1 2 -0.135 -0.082 -0.108 -0.108
## ST89Q05 2000 1 3 -0.103 -0.02 -0.055 -0.055
## ST89Q05 2000 1 4 0.28 0.26 0.344 0.344
## ST89Q05 2000 2 3 0.032 0.062 0.053 0.053
## ST89Q05 2000 2 4 0.415 0.342 0.452 0.453
## ST89Q05 2000 3 4 0.383 0.28 0.399 0.399
## ST89Q05 4000 1 2 -0.098 -0.064 -0.08 -0.08
## ST89Q05 4000 1 3 -0.094 -0.101 -0.126 -0.126
## ST89Q05 4000 1 4 0.024 -0.001 -0.022 -0.022
## ST89Q05 4000 2 3 0.004 -0.037 -0.046 -0.046
## ST89Q05 4000 2 4 0.122 0.063 0.058 0.058
## ST89Q05 4000 3 4 0.118 0.1 0.104 0.104
## ST89Q05 6000 1 2 -0.087 -0.082 -0.104 -0.104
## ST89Q05 6000 1 3 -0.075 -0.068 -0.08 -0.08
## ST89Q05 6000 1 4 -0.254 -0.213 -0.26 -0.26
## ST89Q05 6000 2 3 0.013 0.014 0.024 0.024
## ST89Q05 6000 2 4 -0.167 -0.131 -0.156 -0.156
## ST89Q05 6000 3 4 -0.179 -0.145 -0.18 -0.18