PISA 2012 design background questions (ST89) as latent regressors

We examined known group differences against estimated group differences under the following conditions: 2 (IRT models) x 2 (IRT R packages) x 3 (sample sizes).

The generating parameters used in this simulation are saved in the lsasim package. The package itself contains three datasets to aid in the generation of item responses from the mathematics portion of the PISA 2012 "standard" test booklets. We selected four background varaibles from Attitude towards school: Learning outcomes (ATSCHL) scale (ST88Q01, ST88Q02, ST88Q03, ST88Q04) to identify group differences and the corresponding latent trait (PV1MATH).

Attitude towards school: Learning outcomes (ATSCHL) scale: Thinking about what you have learned at school: to what extent do you agree with the following statements?

ST88Q01 a) School has done little to prepare me for adult life when I leave school
ST88Q02 b) School has been a waste of time
ST88Q03 c) School has helped give me confidence to make decisions
ST88Q04 d) School has taught me things which could be useful in a job
- 1 Strongly agree
- 2 Agree
- 3 Disagree
- 4 Strongly disagree

Two types of IRT models were used: Rasch items and partial credit (PC) items
- Item parameters were drawn from PISA 2012 mathematics items used in standard booklets
- There were 76 Rasch items and 8 PC items
- The total 84 items were administered in the 13 standard test booklets
Two IRT R packages were evaluated: TAM (version 2.4-9) and mirt (version 1.25). Each package used a unique estimator. These are:
- Warm's weighted likelihood estimates (WLE) using TAM
- Expected-a-posteriori (EAP) using mirt
Three sample sizes were used: 2000, 4000, and 6000
- Simulated samples were based on PISA 2012 data

One hundred replications were used for each condition for the calibration

Person trait recovery:
- Known (true) group differences against estimated group differences recovered well when using WLE than EAP

# Load libraries
if(!require(lsasim)){  
  install.packages("lsasim")
  library(lsasim) #version 1.0.1
}

if(!require(mirt)){
  install.packages("mirt")
  library(mirt) #1.25
}

if(!require(TAM)){
  install.packages("TAM")
  library(TAM) #2.4-9
}

if(!require(psych)){
  install.packages("psych")
  library(psych) #1.7.5
}

# Set up conditions
N.cond <- c(2000, 4000, 6000) #number of sample sizes

# Set up number of replications
reps <- 100

# Create space for outputs
results <- NULL

#==============================================================================#
# Background questionnaire selection
#==============================================================================#
# extract items from ST88 scale (ST88Q01, ST88Q02, ST88Q03, ST88Q04) and PV1MATH
pisa2012_cat_prop <- lsasim::pisa2012_q_marginal[c(11:14,19)]
print(pisa2012_cat_prop)

## $ST88Q01
## [1] 0.065 0.311 0.790 1.000
## 
## $ST88Q02
## [1] 0.029 0.102 0.548 1.000
## 
## $ST88Q03
## [1] 0.197 0.752 0.945 1.000
## 
## $ST88Q04
## [1] 0.452 0.895 0.974 1.000
## 
## $PV1MATH
## [1] 1

pisa2012_cor_matrix <- lsasim::pisa2012_q_cormat[c(11:14,19), c(11:14,19)]
print(pisa2012_cor_matrix)

##             ST88Q01    ST88Q02     ST88Q03     ST88Q04     PV1MATH
## ST88Q01  1.00000000  0.4825506 -0.33800998 -0.38529205  0.08013438
## ST88Q02  0.48255064  1.0000000 -0.31299452 -0.42906406  0.17632929
## ST88Q03 -0.33800998 -0.3129945  1.00000000  0.47072970  0.05816648
## ST88Q04 -0.38529205 -0.4290641  0.47072970  1.00000000 -0.01592925
## PV1MATH  0.08013438  0.1763293  0.05816648 -0.01592925  1.00000000

#==============================================================================#
# START SIMULATION
#==============================================================================#
for (r in 1:reps) { #replication
  
  for (N in N.cond) { #sample size
    
    set.seed(17228)
    
    n_examinees <- N
    
    ### -- Background questionnaire generation
    # generate background questionnaire data
    pisa_background <- questionnaire_gen(n = n_examinees,
                                         cat_prop = pisa2012_cat_prop, 
                                         cor_matrix = pisa2012_cor_matrix)
    
    ### -- Cognitive assessment generation
    # assign items to blocks
    pisa2012_math_block_mat <- as.matrix(pisa2012_math_block[, -c(1:2)])
    pisa_blocks <- lsasim::block_design(item_parameters = pisa2012_math_item,
                                        item_block_matrix = pisa2012_math_block_mat)
    
    #assign blocks to booklets
    pisa2012_math_book_mat <- as.matrix(pisa2012_math_booklet[, -1])
    pisa_books <- lsasim::booklet_design(item_block_assignment =
                                           pisa_blocks$block_assignment,
                                         book_design = pisa2012_math_book_mat)
    
    #assign booklets to subjects 
    subj_booklets <- lsasim::booklet_sample(n_subj = N,
                                            book_item_design = pisa_books)
    
    #subset items in standard booklets
    subitems <- sort(unique(subj_booklets$item))
    pisa_items <- pisa2012_math_item[subitems, ]
    
    # generate item responses 
    pisa_ir <- lsasim::response_gen(subject = subj_booklets$subject,
                                    item = subj_booklets$item, 
                                    theta = pisa_background$PV1MATH,
                                    item_no = pisa_items$item, 
                                    b_par = pisa_items$b,
                                    d_par = list(pisa_items$d1, 
                                                 pisa_items$d2))
    
    # -- Merge questionnaire data  with cognitive assessment data
    pisa_data <- merge(pisa_background, pisa_ir, by = "subject")
    
    # extract item responses (excluding "subject" column)
    resp <- pisa_ir[, c(1:length(pisa_items$item))]
    
    #------------------------------------------------------------------------------#
    # Model estimation
    #------------------------------------------------------------------------------#
    
    # model 1: fit Rasch and PC models using mirt package
    mirt.mod <- NULL
    mirt.mod <- mirt::mirt(resp,1, itemtype = 'Rasch', 
                           technical = list( NCYCLES = 500), verbose = F)
    
    # model 2: fit Rasch and PC models using TAM package
    tam.mod <- NULL
    tam.mod <- TAM::tam.mml(resp, irtmodel="PCM2")
    
    # model 3: fit Rasch and PC models with latent regressors using TAM package
    
    # latent regressors Y
    regressors <- pisa_background[,paste0("ST88Q0",1:4)]
    regressors$ST88Q01.new <- data.frame(psych::dummy.code(regressors$ST88Q01))[,1:3] 
         #category 4 as reference for ST88Q01 
    regressors$ST88Q02.new <- data.frame(psych::dummy.code(regressors$ST88Q02))[,1:3] 
         #category 4 as reference for ST88Q02
    regressors$ST88Q03.new <- data.frame(psych::dummy.code(regressors$ST88Q03))[,1:3] 
         #category 4 as reference for ST88Q03 
    regressors$ST88Q04.new <- data.frame(psych::dummy.code(regressors$ST88Q04))[,1:3]
         #category 4 as reference for ST88Q04
    
    Y <- cbind(regressors$ST88Q01.new, 
               regressors$ST88Q02.new, 
               regressors$ST88Q03.new, 
               regressors$ST88Q04.new)
    
    colnames(Y) <- c(paste0("ST88Q01_cat",1:3), paste0("ST88Q02_cat",1:3),
                     paste0("ST88Q03_cat",1:3), paste0("ST88Q04_cat",1:3))
    
    tam.mod.2 <- NULL
    tam.mod.2 <- TAM::tam.mml(resp, irtmodel="PCM2", Y=Y)
    
    #------------------------------------------------------------------------------#
    # Person parameter extraction
    #------------------------------------------------------------------------------#
    
    # extract thetas
    pisa_data$mirt.eap <- c(fscores(mirt.mod, method="EAP")) 
    pisa_data$tam.wle <- tam.wle(tam.mod)$theta  
    pisa_data$tam.reg <- tam.wle(tam.mod.2)$theta  
    
    # summarize background variables (qs), generalized theta, and estimated thetas 
    FS <- pisa_data[,c("subject", "ST88Q01", "ST88Q02", "ST88Q03", "ST88Q04", 
                       "PV1MATH", "mirt.eap", "tam.wle", "tam.reg")]
    
    # summarize results
    person <- data.frame(matrix(c(N, r), nrow = 1))
    colnames(person) <- c("N", "rep")
    person <- cbind(person, FS)
    
    # combine results
    results <- rbind(results, person)
    
  }
}

Summary:

We summarized the group differences based on four background variables (ST88Q01, ST88Q02, ST88Q03, and ST88Q04), which were socred based on 4-point Likert scale. For each variable, we compared the generating theta difference to estimated theta difference between all possible combinations ( 1 vs 2, 1 vs 3, 1 vs 4, 2 vs 3, 2 vs 4, and 3 vs 4).
theta stands for the generating theta value, mirt_EAP stands for the EAP estimator calibrated by mirt package, tam_WLE stands for the WLE estimator calibrated by TAM package, and tam_REG stands for the WLE estimator generated using a model with regressors by TAM package.

pairs <- matrix(c(1,2,1,3,1,4,2,3,2,4,3,4), ncol = 2, byrow = T)
print(pairs)

##      [,1] [,2]
## [1,]    1    2
## [2,]    1    3
## [3,]    1    4
## [4,]    2    3
## [5,]    2    4
## [6,]    3    4

Group differences by ST88Q01

ST88Q01.out <- NULL
ST88Q01.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q01 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST88Q01.agg [ST88Q01.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST88Q01 <- NULL
    ST88Q01$N <- n
    ST88Q01$GP1 <- pairs[p,1]
    ST88Q01$GP2 <- pairs[p,1]
    ST88Q01$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST88Q01$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST88Q01$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST88Q01$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST88Q01.out <- rbind(ST88Q01.out, ST88Q01)
  }
}
print(ST88Q01.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q01 2000 1   1   -0.085     -0.046   -0.061  -0.061 
## ST88Q01 2000 1   1   -0.123     -0.072   -0.092  -0.092 
## ST88Q01 2000 1   1   -0.247     -0.131   -0.166  -0.166 
## ST88Q01 2000 2   2   -0.038     -0.025   -0.031  -0.031 
## ST88Q01 2000 2   2   -0.162     -0.085   -0.105  -0.106 
## ST88Q01 2000 3   3   -0.124     -0.06    -0.074  -0.074 
## ST88Q01 4000 1   1   -0.11      -0.076   -0.097  -0.097 
## ST88Q01 4000 1   1   -0.193     -0.169   -0.209  -0.209 
## ST88Q01 4000 1   1   -0.304     -0.237   -0.289  -0.289 
## ST88Q01 4000 2   2   -0.084     -0.093   -0.113  -0.113 
## ST88Q01 4000 2   2   -0.195     -0.162   -0.192  -0.192 
## ST88Q01 4000 3   3   -0.111     -0.069   -0.08   -0.08  
## ST88Q01 6000 1   1   -0.035     -0.044   -0.055  -0.055 
## ST88Q01 6000 1   1   -0.091     -0.09    -0.11   -0.11  
## ST88Q01 6000 1   1   -0.245     -0.217   -0.269  -0.269 
## ST88Q01 6000 2   2   -0.056     -0.046   -0.055  -0.055 
## ST88Q01 6000 2   2   -0.21      -0.173   -0.214  -0.214 
## ST88Q01 6000 3   3   -0.154     -0.127   -0.159  -0.159

Group differences by ST88Q02

ST88Q02.out <- NULL
ST88Q02.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q02 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST88Q02.agg [ST88Q02.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST88Q02 <- NULL
    ST88Q02$N <- n
    ST88Q02$GP1 <- pairs[p,1]
    ST88Q02$GP2 <- pairs[p,2]
    ST88Q02$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST88Q02$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST88Q02$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST88Q02$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST88Q02.out <- rbind(ST88Q02.out, ST88Q02)
  }
}
print(ST88Q02.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q02 2000 1   2   -0.292     -0.238   -0.294  -0.294 
## ST88Q02 2000 1   3   -0.373     -0.276   -0.345  -0.345 
## ST88Q02 2000 1   4   -0.62      -0.478   -0.594  -0.595 
## ST88Q02 2000 2   3   -0.082     -0.039   -0.051  -0.051 
## ST88Q02 2000 2   4   -0.329     -0.241   -0.3    -0.3   
## ST88Q02 2000 3   4   -0.247     -0.202   -0.249  -0.249 
## ST88Q02 4000 1   2   -0.181     -0.1     -0.158  -0.158 
## ST88Q02 4000 1   3   -0.51      -0.395   -0.507  -0.507 
## ST88Q02 4000 1   4   -0.704     -0.552   -0.701  -0.701 
## ST88Q02 4000 2   3   -0.329     -0.295   -0.349  -0.349 
## ST88Q02 4000 2   4   -0.523     -0.451   -0.543  -0.543 
## ST88Q02 4000 3   4   -0.194     -0.157   -0.194  -0.194 
## ST88Q02 6000 1   2   -0.175     -0.108   -0.121  -0.121 
## ST88Q02 6000 1   3   -0.395     -0.271   -0.324  -0.324 
## ST88Q02 6000 1   4   -0.593     -0.436   -0.528  -0.528 
## ST88Q02 6000 2   3   -0.22      -0.163   -0.203  -0.203 
## ST88Q02 6000 2   4   -0.418     -0.329   -0.407  -0.407 
## ST88Q02 6000 3   4   -0.198     -0.165   -0.204  -0.204

Group differences by ST88Q03

ST88Q03.out <- NULL
ST88Q03.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q03 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST88Q03.agg [ST88Q03.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST88Q03 <- NULL
    ST88Q03$N <- n
    ST88Q03$GP1 <- pairs[p,1]
    ST88Q03$GP2 <- pairs[p,2]
    ST88Q03$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST88Q03$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST88Q03$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST88Q03$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST88Q03.out <- rbind(ST88Q03.out, ST88Q03)
  }
}
print(ST88Q03.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q03 2000 1   2   -0.126     -0.1     -0.131  -0.131 
## ST88Q03 2000 1   3   -0.204     -0.168   -0.203  -0.203 
## ST88Q03 2000 1   4   -0.067     -0.042   -0.062  -0.062 
## ST88Q03 2000 2   3   -0.078     -0.068   -0.072  -0.072 
## ST88Q03 2000 2   4   0.059      0.058    0.069   0.069  
## ST88Q03 2000 3   4   0.137      0.126    0.141   0.141  
## ST88Q03 4000 1   2   -0.056     -0.034   -0.047  -0.047 
## ST88Q03 4000 1   3   -0.069     -0.047   -0.06   -0.06  
## ST88Q03 4000 1   4   -0.044     -0.048   -0.049  -0.049 
## ST88Q03 4000 2   3   -0.013     -0.012   -0.013  -0.013 
## ST88Q03 4000 2   4   0.012      -0.014   -0.003  -0.002 
## ST88Q03 4000 3   4   0.024      -0.001   0.011   0.011  
## ST88Q03 6000 1   2   -0.064     -0.04    -0.053  -0.053 
## ST88Q03 6000 1   3   -0.099     -0.046   -0.058  -0.058 
## ST88Q03 6000 1   4   -0.229     -0.158   -0.178  -0.178 
## ST88Q03 6000 2   3   -0.035     -0.006   -0.004  -0.004 
## ST88Q03 6000 2   4   -0.165     -0.118   -0.124  -0.124 
## ST88Q03 6000 3   4   -0.13      -0.112   -0.12   -0.12

Group differences by ST88Q04

ST88Q04.out <- NULL
ST88Q04.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST88Q04 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST88Q04.agg [ST88Q04.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST88Q04 <- NULL
    ST88Q04$N <- n
    ST88Q04$GP1 <- pairs[p,1]
    ST88Q04$GP2 <- pairs[p,2]
    ST88Q04$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST88Q04$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST88Q04$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST88Q04$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST88Q04.out <- rbind(ST88Q04.out, ST88Q04)
  }
}
print(ST88Q04.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST88Q04 2000 1   2   0.008      0.02     0.027   0.027  
## ST88Q04 2000 1   3   0.08       0.023    0.011   0.011  
## ST88Q04 2000 1   4   0.139      0.114    0.148   0.148  
## ST88Q04 2000 2   3   0.073      0.003    -0.016  -0.016 
## ST88Q04 2000 2   4   0.131      0.094    0.121   0.122  
## ST88Q04 2000 3   4   0.059      0.091    0.137   0.137  
## ST88Q04 4000 1   2   0.013      0.008    0.008   0.008  
## ST88Q04 4000 1   3   0.041      0.089    0.105   0.105  
## ST88Q04 4000 1   4   0.062      0.034    0.047   0.047  
## ST88Q04 4000 2   3   0.028      0.08     0.097   0.097  
## ST88Q04 4000 2   4   0.05       0.026    0.039   0.039  
## ST88Q04 4000 3   4   0.022      -0.054   -0.058  -0.058 
## ST88Q04 6000 1   2   0.047      0.027    0.033   0.033  
## ST88Q04 6000 1   3   0.073      0.035    0.048   0.048  
## ST88Q04 6000 1   4   -0.044     -0.013   0.011   0.011  
## ST88Q04 6000 2   3   0.026      0.008    0.015   0.015  
## ST88Q04 6000 2   4   -0.091     -0.039   -0.022  -0.022 
## ST88Q04 6000 3   4   -0.117     -0.047   -0.036  -0.036

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

PISA 2012 design background questions (ST89) as latent regressors

Clone this wiki locally