PISA 2012 design background questions (ST89) as latent regressors

We examined known group differences against estimated group differences under the following conditions: 2 (IRT models) x 2 (IRT R packages) x 3 (sample sizes).

The generating parameters used in this simulation were saved at lsasim package which contains three datasets to aid in the generation of item responses from the mathematics portion of the PISA 2012 "standard" test booklets. We selected four background varaibles from Attitude towards school: Learning activities (ATTLNACT) scale (ST89Q02, ST89Q03, ST89Q04, ST89Q05) to identify group differences and the corresponding latent trait (PVMATH).

Attitude towards school: Learning activities (ATTLNACT) scale: Thinking about your school: to what extent do you agree with the following statements?

ST89Q02 a) Trying hard at school will help me get a good job
ST89Q03 b) Trying hard at school will help me get into a good
ST89Q04 c) I enjoy receiving good
ST89Q05 d) Trying hard at school is important
- 1 Strongly agree
- 2 Agree
- 3 Disagree
- 4 Strongly disagree

Two types of IRT models were used: Rasch items and partial credit (PC) items
- Item parameters were drawn from PISA 2012 mathematics items used in standard booklets
- There were 76 Rasch items and 8 PC items
- The total 84 items were administered in the 13 standard test booklets
Two IRT R packages were evaluated: TAM (version 2.4-9) and mirt (version 1.25). Each package used a unique estimator. These are:
- Warm's weighted likelihood estimates (WLE) using TAM
- Expected-a-posteriori (EAP) using mirt
Three sample sizes were used: 2000, 4000, and 6000
- Simulated samples were based on PISA 2012 data

One hundred replications were used for each condition for the calibration

Person trait recovery:
- Known (true) group differences against estimated group differences recovered well when using WLE than EAP

# Load libraries
if(!require(lsasim)){  
  install.packages("lsasim")
  library(lsasim) #version 1.0.1
}

if(!require(mirt)){
  install.packages("mirt")
  library(mirt) #1.25
}

if(!require(TAM)){
  install.packages("TAM")
  library(TAM) #2.4-9
}

if(!require(psych)){
  install.packages("psych")
  library(psych) #1.7.5
}

# Set up conditions
N.cond <- c(2000, 4000, 6000) #number of sample sizes

# Set up number of replications
reps <- 100

# Create space for outputs
results <- NULL

#==============================================================================#
# Background questionnaire selection
#==============================================================================#
# extract items from ST89 scale (ST89Q02, ST89Q03, ST89Q04, ST89Q05) and PV1MATH
pisa2012_cat_prop <- lsasim::pisa2012_q_marginal[c(15:18,19)]
print(pisa2012_cat_prop)

## $ST89Q02
## [1] 0.433 0.908 0.988 1.000
## 
## $ST89Q03
## [1] 0.544 0.891 0.973 1.000
## 
## $ST89Q04
## [1] 0.735 0.981 0.995 1.000
## 
## $ST89Q05
## [1] 0.437 0.935 0.988 1.000
## 
## $PV1MATH
## [1] 1

pisa2012_cor_matrix <- lsasim::pisa2012_q_cormat[c(15:18,19), c(15:18,19)]
print(pisa2012_cor_matrix)

##            ST89Q02    ST89Q03    ST89Q04    ST89Q05     PV1MATH
## ST89Q02  1.0000000  0.5622499  0.4510371 0.63679841 -0.03419140
## ST89Q03  0.5622499  1.0000000  0.4603968 0.47993703 -0.26238202
## ST89Q04  0.4510371  0.4603968  1.0000000 0.62383392 -0.14810388
## ST89Q05  0.6367984  0.4799370  0.6238339 1.00000000  0.04829041
## PV1MATH -0.0341914 -0.2623820 -0.1481039 0.04829041  1.00000000

#==============================================================================#
# START SIMULATION
#==============================================================================#
for (r in 1:reps) { #replication
  
  for (N in N.cond) { #sample size
    
    set.seed(17228)
    
    n_examinees <- N
    
    ### -- Background questionnaire generation
    # generate background questionnaire data
    pisa_background <- questionnaire_gen(n = n_examinees,
                                         cat_prop = pisa2012_cat_prop, 
                                         cor_matrix = pisa2012_cor_matrix)
    
    ### -- Cognitive assessment generation
    # assign items to blocks
    pisa2012_math_block_mat <- as.matrix(pisa2012_math_block[, -c(1:2)])
    pisa_blocks <- lsasim::block_design(item_parameters = pisa2012_math_item,
                                        item_block_matrix = pisa2012_math_block_mat)
    
    #assign blocks to booklets
    pisa2012_math_book_mat <- as.matrix(pisa2012_math_booklet[, -1])
    pisa_books <- lsasim::booklet_design(item_block_assignment = 
                                           pisa_blocks$block_assignment,
                                         book_design = pisa2012_math_book_mat)
    
    #assign booklets to subjects 
    subj_booklets <- lsasim::booklet_sample(n_subj = N,
                                            book_item_design = pisa_books)
    
    #subset items in standard booklets
    subitems <- sort(unique(subj_booklets$item))
    pisa_items <- pisa2012_math_item[subitems, ]
    
    # generate item responses 
    pisa_ir <- lsasim::response_gen(subject = subj_booklets$subject,
                                    item = subj_booklets$item, 
                                    theta = pisa_background$PV1MATH,
                                    item_no = pisa_items$item, 
                                    b_par = pisa_items$b,
                                    d_par = list(pisa_items$d1, 
                                                 pisa_items$d2))
    
    # -- Merge questionnaire data  with cognitive assessment data
    pisa_data <- merge(pisa_background, pisa_ir, by = "subject")
    
    # extract item responses (excluding "subject" column)
    resp <- pisa_ir[, c(1:length(pisa_items$item))]
    
    #------------------------------------------------------------------------------#
    # Model estimation
    #------------------------------------------------------------------------------#
    
    # model 1: fit Rasch and PC models using mirt package
    mirt.mod <- NULL
    mirt.mod <- mirt::mirt(resp,1, itemtype = 'Rasch', 
                           technical = list( NCYCLES = 500), verbose = F)
    
    # model 2: fit Rasch and PC models using TAM pckage
    tam.mod <- NULL
    tam.mod <- TAM::tam.mml(resp, irtmodel="PCM2")
    
    # model 3: fit Rasch and PC models with latent regressors using TAM package
    
    # latent regressors Y
    regressors <- pisa_background[,paste0("ST89Q0",2:5)]
    regressors$ST89Q02.new <- data.frame(psych::dummy.code(regressors$ST89Q02))[,1:3] 
         #category 4 as reference for ST89Q02 
    regressors$ST89Q03.new <- data.frame(psych::dummy.code(regressors$ST89Q03))[,1:3] 
         #category 4 as reference for ST89Q03 
    regressors$ST89Q04.new <- data.frame(psych::dummy.code(regressors$ST89Q04))[,1:3] 
         #category 4 as reference for ST89Q04 
    regressors$ST89Q05.new <- data.frame(psych::dummy.code(regressors$ST89Q05))[,1:3] 
         #category 4 as reference for ST89Q05
    
    Y <- cbind(regressors$ST89Q02.new, 
               regressors$ST89Q03.new, 
               regressors$ST89Q04.new, 
               regressors$ST89Q05.new)
    
    colnames(Y) <- c(paste0("ST89Q02_cat",1:3), paste0("ST89Q03_cat",1:3),
                     paste0("ST89Q04_cat",1:3), paste0("ST89Q05_cat",1:3))
    
    tam.mod.2 <- NULL
    tam.mod.2 <- TAM::tam.mml(resp, irtmodel="PCM2", Y=Y)
    
    #------------------------------------------------------------------------------#
    # Person parameter extraction
    #------------------------------------------------------------------------------#
    
    # extract thetas
    pisa_data$mirt.eap <- c(fscores(mirt.mod, method="EAP")) 
    pisa_data$tam.wle <- tam.wle(tam.mod)$theta  
    pisa_data$tam.reg <- tam.wle(tam.mod.2)$theta  
    
    # summarize background variables (qs), generalized theta, and estimated thetas 
    FS <- pisa_data[,c("subject", "ST89Q02", "ST89Q03", "ST89Q04", "ST89Q05", 
                       "PV1MATH", "mirt.eap", "tam.wle", "tam.reg")]
    
    # summarize results
    person <- data.frame(matrix(c(N, r), nrow = 1))
    colnames(person) <- c("N", "rep")
    person <- cbind(person, FS)
    
    # combine results
    results <- rbind(results, person)
    
  }
}

Summary:

We summarized the group differences based on four background variables (ST89Q02, ST89Q03, ST89Q04, and ST89Q05), which were socred based on 4-point Likert scale. For each variable, we compared the generating theta difference to estimated theta difference between all possible combinations ( 1 vs 2, 1 vs 3, 1 vs 4, 2 vs 3, 2 vs 4, and 3 vs 4).
theta stands for the generating theta value, mirt_EAP stands for the EAP estimator calibrated by mirt package, tam_WLE stands for the WLE estimator calibrated by TAM package, and tam_REG stands for the WLE estimator generated using a model with regressors by TAM package.

pairs <- matrix(c(1,2,1,3,1,4,2,3,2,4,3,4), ncol = 2, byrow = T)
print(pairs)

##      [,1] [,2]
## [1,]    1    2
## [2,]    1    3
## [3,]    1    4
## [4,]    2    3
## [5,]    2    4
## [6,]    3    4

Group differences by ST89Q02

ST89Q02.out <- NULL
ST89Q02.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q02 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST89Q02.agg [ST89Q02.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST89Q02 <- NULL
    ST89Q02$N <- n
    ST89Q02$GP1 <- pairs[p,1]
    ST89Q02$GP2 <- pairs[p,2]
    ST89Q02$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST89Q02$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST89Q02$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST89Q02$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST89Q02.out <- rbind(ST89Q02.out, ST89Q02)
  }
}
print(ST89Q02.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q02 2000 1   2   0.057      0.09     0.107   0.108  
## ST89Q02 2000 1   3   0.159      0.118    0.13    0.13   
## ST89Q02 2000 1   4   0.393      0.421    0.534   0.535  
## ST89Q02 2000 2   3   0.102      0.029    0.023   0.022  
## ST89Q02 2000 2   4   0.336      0.331    0.427   0.428  
## ST89Q02 2000 3   4   0.234      0.303    0.404   0.405  
## ST89Q02 4000 1   2   0.011      -0.001   0.002   0.002  
## ST89Q02 4000 1   3   0.038      0.058    0.067   0.067  
## ST89Q02 4000 1   4   0.196      0.228    0.264   0.265  
## ST89Q02 4000 2   3   0.027      0.06     0.066   0.066  
## ST89Q02 4000 2   4   0.185      0.229    0.263   0.263  
## ST89Q02 4000 3   4   0.158      0.169    0.197   0.198  
## ST89Q02 6000 1   2   0.086      0.069    0.083   0.083  
## ST89Q02 6000 1   3   0.084      0.091    0.113   0.113  
## ST89Q02 6000 1   4   -0.049     0.014    0.006   0.006  
## ST89Q02 6000 2   3   -0.002     0.022    0.03    0.03   
## ST89Q02 6000 2   4   -0.135     -0.055   -0.077  -0.077 
## ST89Q02 6000 3   4   -0.134     -0.077   -0.106  -0.107

Group differences by ST89Q03

ST89Q03.out <- NULL
ST89Q03.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q03 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST89Q03.agg [ST89Q03.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST89Q03 <- NULL
    ST89Q03$N <- n
    ST89Q03$GP1 <- pairs[p,1]
    ST89Q03$GP2 <- pairs[p,2]
    ST89Q03$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST89Q03$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST89Q03$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST89Q03$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST89Q03.out <- rbind(ST89Q03.out, ST89Q03)
  }
}
print(ST89Q03.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q03 2000 1   2   0.317      0.238    0.3     0.3    
## ST89Q03 2000 1   3   0.571      0.509    0.634   0.634  
## ST89Q03 2000 1   4   1.015      0.875    1.057   1.057  
## ST89Q03 2000 2   3   0.254      0.271    0.335   0.335  
## ST89Q03 2000 2   4   0.698      0.637    0.757   0.758  
## ST89Q03 2000 3   4   0.443      0.366    0.423   0.423  
## ST89Q03 4000 1   2   0.352      0.26     0.321   0.321  
## ST89Q03 4000 1   3   0.605      0.504    0.623   0.623  
## ST89Q03 4000 1   4   0.664      0.533    0.675   0.675  
## ST89Q03 4000 2   3   0.253      0.244    0.302   0.302  
## ST89Q03 4000 2   4   0.312      0.273    0.354   0.354  
## ST89Q03 4000 3   4   0.059      0.029    0.052   0.052  
## ST89Q03 6000 1   2   0.389      0.307    0.381   0.381  
## ST89Q03 6000 1   3   0.59       0.475    0.595   0.595  
## ST89Q03 6000 1   4   0.646      0.553    0.689   0.689  
## ST89Q03 6000 2   3   0.201      0.168    0.214   0.214  
## ST89Q03 6000 2   4   0.257      0.246    0.308   0.308  
## ST89Q03 6000 3   4   0.055      0.078    0.093   0.094

Group differences by ST89Q04

ST89Q04.out <- NULL
ST89Q04.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q04 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST89Q04.agg [ST89Q04.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST89Q04 <- NULL
    ST89Q04$N <- n
    ST89Q04$GP1 <- pairs[p,1]
    ST89Q04$GP2 <- pairs[p,2]
    ST89Q04$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST89Q04$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST89Q04$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST89Q04$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST89Q04.out <- rbind(ST89Q04.out, ST89Q04)
  }
}
print(ST89Q04.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q04 2000 1   2   0.225      0.203    0.255   0.255  
## ST89Q04 2000 1   3   0.562      0.472    0.548   0.549  
## ST89Q04 2000 1   4   0.556      0.354    0.41    0.41   
## ST89Q04 2000 2   3   0.337      0.269    0.293   0.294  
## ST89Q04 2000 2   4   0.331      0.151    0.155   0.155  
## ST89Q04 2000 3   4   -0.006     -0.118   -0.138  -0.139 
## ST89Q04 4000 1   2   0.282      0.202    0.25    0.25   
## ST89Q04 4000 1   3   0.306      0.218    0.282   0.283  
## ST89Q04 4000 1   4   0.607      0.303    0.373   0.373  
## ST89Q04 4000 2   3   0.024      0.016    0.032   0.032  
## ST89Q04 4000 2   4   0.325      0.101    0.123   0.123  
## ST89Q04 4000 3   4   0.301      0.085    0.09    0.09   
## ST89Q04 6000 1   2   0.207      0.189    0.236   0.236  
## ST89Q04 6000 1   3   0.293      0.325    0.392   0.392  
## ST89Q04 6000 1   4   0.513      0.277    0.389   0.389  
## ST89Q04 6000 2   3   0.086      0.136    0.156   0.156  
## ST89Q04 6000 2   4   0.306      0.087    0.153   0.153  
## ST89Q04 6000 3   4   0.22       -0.048   -0.003  -0.003

Group differences by ST89Q05

ST89Q05.out <- NULL
ST89Q05.agg <- aggregate(cbind(PV1MATH, mirt.eap, tam.wle, tam.reg) ~ N + ST89Q05 , 
                data=results, mean, na.rm=TRUE)

for (n in c(2000, 4000, 6000)){
  
  subdata <- ST89Q05.agg [ST89Q05.agg$N==n,]
  
  for (p in 1:nrow(pairs)){
    ST89Q05 <- NULL
    ST89Q05$N <- n
    ST89Q05$GP1 <- pairs[p,1]
    ST89Q05$GP2 <- pairs[p,2]
    ST89Q05$true.theta <- round(subdata$PV1MATH[pairs[p,1]] - subdata$PV1MATH[pairs[p,2]],3)
    ST89Q05$mirt.EAP <- round(subdata$mirt.eap[pairs[p,1]] - subdata$mirt.eap[pairs[p,2]],3)
    ST89Q05$tam.WLE <- round(subdata$tam.wle[pairs[p,1]] - subdata$tam.wle[pairs[p,2]],3)
    ST89Q05$tam.REG<- round(subdata$tam.reg[pairs[p,1]] - subdata$tam.reg[pairs[p,2]],3)
    ST89Q05.out <- rbind(ST89Q05.out, ST89Q05)
  }
}
print(ST89Q05.out)

##         N    GP1 GP2 true.theta mirt.EAP tam.WLE tam.REG
## ST89Q05 2000 1   2   -0.135     -0.082   -0.108  -0.108 
## ST89Q05 2000 1   3   -0.103     -0.02    -0.055  -0.055 
## ST89Q05 2000 1   4   0.28       0.26     0.344   0.344  
## ST89Q05 2000 2   3   0.032      0.062    0.053   0.053  
## ST89Q05 2000 2   4   0.415      0.342    0.452   0.453  
## ST89Q05 2000 3   4   0.383      0.28     0.399   0.399  
## ST89Q05 4000 1   2   -0.098     -0.064   -0.08   -0.08  
## ST89Q05 4000 1   3   -0.094     -0.101   -0.126  -0.126 
## ST89Q05 4000 1   4   0.024      -0.001   -0.022  -0.022 
## ST89Q05 4000 2   3   0.004      -0.037   -0.046  -0.046 
## ST89Q05 4000 2   4   0.122      0.063    0.058   0.058  
## ST89Q05 4000 3   4   0.118      0.1      0.104   0.104  
## ST89Q05 6000 1   2   -0.087     -0.082   -0.104  -0.104 
## ST89Q05 6000 1   3   -0.075     -0.068   -0.08   -0.08  
## ST89Q05 6000 1   4   -0.254     -0.213   -0.26   -0.26  
## ST89Q05 6000 2   3   0.013      0.014    0.024   0.024  
## ST89Q05 6000 2   4   -0.167     -0.131   -0.156  -0.156 
## ST89Q05 6000 3   4   -0.179     -0.145   -0.18   -0.18

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

PISA 2012 design background questions (ST89) as latent regressors

Clone this wiki locally