Skip to content

Commit

Permalink
fixes some bugs with incomes <= 0 on svygei, svyatk and svyjdiv
Browse files Browse the repository at this point in the history
fixes misnamed statistic attribute in svyatk
  • Loading branch information
guilhermejacob committed Nov 10, 2023
1 parent 6c3c10f commit 7e677d8
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 53 deletions.
68 changes: 36 additions & 32 deletions R/svyatk.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ svyatk.survey.design <-
lin <-
matrix(lin ,
nrow = length(lin) ,
dimnames = list(names(lin) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))
dimnames = list( rownames( design$variables ) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))

# build result object
rval <- estimate
Expand All @@ -221,11 +221,11 @@ svyatk.survey.design <-
attr(rval, "var") <- variance
attr(rval, "statistic") <- "atkinson"
attr(rval, "epsilon") <- epsilon
if (linearized)
attr(rval, "linearized") <- lin
if (influence)
attr(rval , "influence") <-
sweep(lin , 1 , design$prob , "/")
if (linearized)
attr(rval, "linearized") <- lin
if (linearized |
influence)
attr(rval , "index") <- as.numeric(rownames(lin))
Expand Down Expand Up @@ -324,7 +324,7 @@ svyatk.svyrep.design <-
lin <-
matrix(lin ,
nrow = length(lin) ,
dimnames = list(names(lin) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))
dimnames = list( rownames( design$variables ) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))

}

Expand All @@ -333,10 +333,10 @@ svyatk.svyrep.design <-
names(rval) <-
strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]
attr(rval, "var") <- variance
attr(rval, "statistic") <- "gini"
if (linearized)
attr(rval , "linearized") <- lin
attr(rval, "statistic") <- "atkinson"
if (linearized)
attr(rval, "linearized") <- lin
if (linearized )
attr(rval , "index") <- as.numeric(rownames(lin))

# keep replicates
Expand Down Expand Up @@ -379,52 +379,56 @@ svyatk.DBIsvydesign <-

CalcAtkinson <-
function(yk , wk , epsilon) {

# filter observations
yk <- yk[wk != 0]
wk <- wk[wk != 0]
wk <- ifelse( yk > 0 & wk != 0 , wk , 0 )
yk <- ifelse( wk !=0 , yk , 1 )

# compute intermediate statistics
U0 <- sum(wk)
U1 <- sum(wk * yk)
T0 <- sum(wk * log(yk))
T1 <- sum(wk * yk * log(yk))
U0 <- sum( wk )
U1 <- sum( wk * yk )
T0 <- sum( wk * log( yk ) )
T1 <- sum( wk * yk * log( yk ) )

# compute values
if (epsilon == 1) {
res <- 1 - (U0 / U1) * exp(T0 / U0)
res <- 1 - ( U0 / U1 ) * exp( T0 / U0 )
} else {
U.neps <- sum(wk * yk ^ (1 - epsilon))
afac <- epsilon / (1 - epsilon)
bfac <- 1 / (1 - epsilon)
res <- 1 - (U.neps ^ bfac) / (U0 ^ afac * U1)
U.neps <- sum( wk * yk^( 1 - epsilon ) )
afac <- epsilon / ( 1 - epsilon )
bfac <- 1 / ( 1 - epsilon )
res <- 1 - ( U.neps^bfac ) / ( U0^afac * U1)
}
res

}

CalcAtkinson_IF <-
function(yk , wk , epsilon) {

# filter observations
wk <- ifelse( yk > 0 & wk != 0 , wk , 0 )
yk <- ifelse( wk !=0 , yk , 1 )

U0 <- sum( wk )
U1 <- sum( ifelse( wk != 0 , wk * yk , 0 ) )
T0 <- sum( ifelse( wk != 0 , wk * log( yk ) , 0 ) )
T1 <- sum( ifelse( wk != 0 , wk * yk * log( yk ) , 0 ) )
U1 <- sum( wk * yk )
T0 <- sum( wk * log( yk ) )
T1 <- sum( wk * yk * log( yk ) )
if (epsilon == 1) {
A1 <- CalcAtkinson(yk , wk , 1)
L1 <- ( A1 - 1 ) * (1 - T0 / U0) / U0
L2 <- yk * (1 - A1) / U1
L3 <- ifelse( wk != 0 , log( yk ) * ( A1 - 1 ) / U0 , 0 )
lin <- rowSums(cbind(L1 , L2 , L3))
A1 <- CalcAtkinson( yk , wk , 1 )
L1 <- ( A1 - 1 ) * (1 - T0 / U0 ) / U0
L2 <- yk * ( 1 - A1 ) / U1
L3 <- log( yk ) * ( A1 - 1 ) / U0
lin <- rowSums( cbind( L1 , L2 , L3 ) )
} else {
U.neps <- sum( ifelse( wk != 0 , wk * yk^( 1 - epsilon ) , 0 ) )
U.neps <- sum( wk * yk^( 1 - epsilon ) )
afac <- epsilon / ( 1 - epsilon )
bfac <- 1 / ( 1 - epsilon )
L1 <- afac * ( U.neps^bfac ) / ( U1 * U0^bfac )
L2 <- yk * ( U.neps^bfac ) / ( U0^afac * U1^2 )
L3 <-
- ifelse( wk != 0 ,
bfac * ( yk^( 1 - epsilon ) ) * (U.neps ^ afac) / ( U0^ afac * U1)
, 0 )
L3 <- - bfac * ( yk^( 1 - epsilon ) ) * ( U.neps^afac ) / ( U0^afac * U1 )
lin <- rowSums( cbind( L1 , L2 , L3 ) )
}
lin <- ifelse( wk != 0 , lin , 0)
lin <- ifelse( wk != 0 , lin , 0 )
lin
}
27 changes: 14 additions & 13 deletions R/svygei.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ svygei.survey.design <-
w <- 1 / design$prob

# check for strictly positive incomes
if (any(incvar[w > 0] <= 0 , na.rm = TRUE))
if (any(incvar[w != 0] <= 0 , na.rm = TRUE))
stop(
"The GEI indices are defined for strictly positive variables only.\nNegative and zero values not allowed."
)
Expand Down Expand Up @@ -205,7 +205,7 @@ svygei.survey.design <-
lin <-
matrix(lin ,
nrow = length(lin) ,
dimnames = list(names(w) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))
dimnames = list( rownames( design$variables ) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))

# build result object
rval <- estimate
Expand Down Expand Up @@ -256,7 +256,7 @@ svygei.svyrep.design <-
ws <- weights(design, "sampling")

# check for strictly positive incomes
if (any(incvar[ws > 0] == 0, na.rm = TRUE))
if (any(incvar[ws != 0] <= 0, na.rm = TRUE))
stop(
"The GEI indices are defined for strictly positive variables only.\nNegative and zero values not allowed."
)
Expand Down Expand Up @@ -319,7 +319,7 @@ svygei.svyrep.design <-
lin <-
matrix(lin ,
nrow = length(lin) ,
dimnames = list(names(lin) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))
dimnames = list( rownames( design$variables ) , strsplit(as.character(formula)[[2]] , ' \\+ ')[[1]]))

}

Expand Down Expand Up @@ -377,12 +377,12 @@ CalcGEI <-
function(y , w, epsilon) {

# filter observations
y <- y[w > 0]
w <- w[w > 0]
w <- ifelse( y > 0 & w != 0 , w , 0 )
y <- ifelse( w!=0 , y , 1 )

# intermediate stats
N <- sum(w)
Ytot <- sum(w * y)
N <- sum( w )
Ytot <- sum( w * y )
Ybar <- Ytot / N

# branch on epsilon
Expand All @@ -394,7 +394,7 @@ CalcGEI <-
} else {
gscore <- (uscore ^ epsilon - 1) / (epsilon ^ 2 - epsilon)
}
gei <- sum(w * gscore) / N
gei <- sum( w * gscore ) / N

# return estimate
return(gei)
Expand All @@ -406,13 +406,14 @@ CalcGEI_IF <-
function(y , w , epsilon) {

# filter observations
w <- ifelse( y > 0 & w != 0 , w , 0 )
y <- ifelse( w!=0 , y , 1 )

# compute intermediate
N <- sum(w)
Ytot <- sum(w * y)
N <- sum( w )
Ytot <- sum( w * y )
Ybar <- Ytot / N
gei <- CalcGEI(y, w, epsilon)
gei <- CalcGEI( y , w , epsilon )

# income-inequality score
uscore <- y / Ybar
Expand All @@ -439,7 +440,7 @@ CalcGEI_IF <-

# linearized
ll <- l1 + l2
ll <- ifelse(w > 0 , ll , 0)
ll <- ifelse( w != 0 , ll , 0 )
ll

}
19 changes: 11 additions & 8 deletions R/svyjdiv.R
Original file line number Diff line number Diff line change
Expand Up @@ -363,29 +363,32 @@ svyjdiv.DBIsvydesign <-

# point estimate function
CalcJDiv <- function(y , w) {

# filter observations
y <- y[w > 0]
w <- w[w > 0]
w <- ifelse( y > 0 & w != 0 , w , 0 )
y <- ifelse( w!=0 , y , 1 )

# compute point esitmate
N <- sum(w)
mu <- sum(y * w) / N
jdiv <- ((y - mu) / mu) * log(y / mu)
N <- sum( w )
mu <- sum( y * w ) / N
jdiv <- ( ( y - mu ) / mu ) * log( y / mu )

# compute point estimate
sum(jdiv * w) / N
jdiv <- ifelse( w != 0 , jdiv , 0 )
sum( jdiv * w ) / N

}

# function to compute linearized function
CalcJDiv_IF <- function(y , w) {

# filter observations
w <- ifelse( y > 0 & w != 0 , w , 0 )
y <- ifelse( w!=0 , y , 1 )

# compute intermediate statistics
Ntot <- sum(w)
Ytot <- sum(y * w)
Ntot <- sum( w )
Ytot <- sum( y * w )
Ybar <- Ytot / Ntot
jdiv <-
sum(ifelse(w > 0 , w * ((y / Ybar) - 1) * log(y / Ybar) , 0)) / Ntot
Expand Down

0 comments on commit 7e677d8

Please sign in to comment.