From 196b2f35d9b7cbd3fcd6572b3031237b25266191 Mon Sep 17 00:00:00 2001 From: evaaepelde Date: Wed, 18 Sep 2024 18:09:24 +0200 Subject: [PATCH] minor updates --- NAMESPACE | 1 + R/main.R | 3 ++ R/utils.R | 14 ++++++++ README.md | 86 +++++++++++++++++++++++++++++++----------------- docs/index.html | 4 ++- docs/pkgdown.yml | 2 +- man/calc_ep.Rd | 2 +- man/calc_tp.Rd | 2 +- paper/paper.md | 28 ++++++++-------- 9 files changed, 94 insertions(+), 48 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7167149..889c598 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,3 +25,4 @@ export(rename_values) export(standardize) export(weighted.median) export(weighted.quantile) +importFrom(dplyr,"%>%") diff --git a/R/main.R b/R/main.R index b9c9129..d9aadea 100644 --- a/R/main.R +++ b/R/main.R @@ -50,6 +50,7 @@ ex_shocks <- function(){ #' distributional impacts, if save TRUE. By default "DI_impacts". #' @param fig if TRUE (by default) create and save the figures of the distributional #' impacts calculated by the function. If FALSE do not create neither save. +#' @importFrom dplyr %>% #' @return a list containing the generated datasets summarising the basic or/and the #' intersectional distributional impacts per selected variable or set of variables. #' @export @@ -176,6 +177,7 @@ ex_var_intersec <- function(){ #' @param index energy poverty index or indices to be calculated. Possible #' options: 10%, 2M, LIHC, HEP, HEP_LI. If "all" (by default) calculates #' all the indices for the selected year/s. +#' @importFrom dplyr %>% #' @return a dataframe with the selected energy poverty indices #' @export calc_ep <- function(year, index = "all"){ @@ -256,6 +258,7 @@ calc_ep <- function(year, index = "all"){ #' @param index transport poverty index or indices to be calculated. Possible #' options: 10%, 2M, LIHC, VTU. If "all" (by default) calculates all the indices #' for the selected year/s. +#' @importFrom dplyr %>% #' @return a dataframe with the selected transport poverty indices #' @export calc_tp <- function(year, index = "all"){ diff --git a/R/utils.R b/R/utils.R index 733daee..7f3a37e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,6 +9,7 @@ options(dplyr.summarise.inform = FALSE) #' values detailed in the mapping included in the package. #' @param data dataset to be standardized. #' @param current_var column name to be standardized. +#' @importFrom dplyr %>% #' @return a dataset with labels renamed based in the mapping included in the package. #' @export rename_values = function(data, current_var) { @@ -35,6 +36,7 @@ rename_values = function(data, current_var) { #' #' Function to standarize data names. #' @param data dataset to be standardized. +#' @importFrom dplyr %>% #' @return a dataset with the variables and labels renamed based in the mapping included in the package. #' @export standardize <- function(data) { @@ -138,6 +140,7 @@ weighted.median <- function(x, w, na.rm=TRUE, type=2, collapse=TRUE) { #' #' Function to identify energy poor households from 2016 #' @param data dataset with the data from the HBS. +#' @importFrom dplyr %>% #' @return a dataset with HBS data where energy poor households are identified. #' @export id_ep1 <- function(data){ @@ -178,6 +181,7 @@ id_ep1 <- function(data){ #' #' Function to identify energy poor households before 2016 (included) #' @param data dataset with the data from the HBS. +#' @importFrom dplyr %>% #' @return a dataset with HBS data where energy poor households are identified. #' @export id_ep2 <- function(data){ @@ -218,6 +222,7 @@ id_ep2 <- function(data){ #' Function to identify transport poor households before 2015 (included) #' @param data dataset with the data from the HBS. #' @param year year of the HBS data. +#' @importFrom dplyr %>% #' @return a dataset with HBS data where transport poor households are identified. #' @export id_tp <- function(data, year){ @@ -286,6 +291,7 @@ id_tp <- function(data, year){ #' @param year year of the HBS you want to load. #' @param path Local path to the folder where the HBS's are stored. Not included in the package. #' @param path_outputs path to save the results (RData) +#' @importFrom dplyr %>% #' @return a list with the 3 files of the HBS. #' @export load_rawhbs <- function(year, path, path_outputs) { @@ -462,6 +468,7 @@ load_rawhbs <- function(year, path, path_outputs) { #' (HBS) according to the aggregation (coicop_year) specified in the package. #' @param data dataset with the data from the HBS. #' @param year year of the HBS to be modified according to the aggregation specified in the package. +#' @importFrom dplyr %>% #' @return a dataset with HBS data where COICOP categories are aggregated according to the classification specified in the package. #' @export add_coicop <- function(data, year) { @@ -508,6 +515,7 @@ add_coicop <- function(data, year) { #' @param year year of the HBS you want to elevate to national accounting. #' @param country country of the HBS you want to elevate. By default "ES" #' (for the moment it only works for Spain, so DO NOT TOUCH). +#' @importFrom dplyr %>% #' @return a dataset with the HBS data where expenses are elevated to national accounting. #' @export elevate_hbs <- function(data, year, country = "ES") { @@ -661,6 +669,7 @@ elevate_hbs <- function(data, year, country = "ES") { #' the package, if you are not going to aggregate the COICOP variables you have to #' replace the column labels by the COICOP variables that appear in your dataset. #' @param year base year for the simulation. It must be the same as the year of the HBS. +#' @importFrom dplyr %>% #' @return a dataset with the HBS data and the new expenses for COICOP categories #' after the application of the price shock. #' @export @@ -778,6 +787,7 @@ adjust_wh_is <- function(data, var_w, var_h) { #' Function to order the labels of the socioeconomic and demographic variables #' @param data dataset in which we want to order the labels of the socioeconomic and demographic variables #' @param g variable for which we want to sort the labels +#' @importFrom dplyr %>% #' @return a dataset in which the labels are ordered for the selected socioeconomic or demographic variable #' @export order_var <- function(data, g){ @@ -813,6 +823,7 @@ order_var <- function(data, g){ #' impacts for each of the variables specified in the package. If not, you can #' indicate a variable or a vector of variables to crate the graph.If you want to #' see the variables for which the function is available run `available_var_impact()`. +#' @importFrom dplyr %>% #' @return a graph per selected variable/s summarizing distributional impacts. #' @export basic_graph <- function(data, var = categories$categories){ @@ -874,6 +885,7 @@ basic_graph <- function(data, var = categories$categories){ #' @param fig generates and saves a graph that summarises the distributional impacts. #' By default it is TRUE, for the graph/s not to be generated and saved indicate FALSE. #' @param shocks_scenario_names vector of the names of the considered scenario shocks +#' @importFrom dplyr %>% #' @return a list containing the generated datasets (.RData) summarising the #' distributional impacts per selected variable. #' @export @@ -935,6 +947,7 @@ impact <- function(data, var = categories$categories, save = T, file_name = "D_i #' variables for the calculation, it must follow the same format as the output of #' `available_var_intersec()`, i.e. a table whose columns have category_a and #' category_b as their titles. +#' @importFrom dplyr %>% #' @return a graph per selected set of variables summarizing the distributional impacts. #' @export intersectional_graph <- function(data, pairs = is_categories){ @@ -1024,6 +1037,7 @@ intersectional_graph <- function(data, pairs = is_categories){ #' @param fig generates and saves a graph that summarises the intersectional distributional #' impacts. By default it is TRUE, for the graph/s not to be generated and saved indicate FALSE. #' @param shocks_scenario_names vector of the names of the considered scenario shocks +#' @importFrom dplyr %>% #' @return a list containing the generated datasets (.RData) summarising the intersectional #' distributional impacts per selected set of variables. #' @export diff --git a/README.md b/README.md index 464bd16..cd044c4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +--- +editor_options: + markdown: + wrap: 72 +--- + # MEDUSA - Modelling Equity and DistribUtional impacts for Socioeconomic Analysis [![docs](https://github.com/bc3LC/medusa/actions/workflows/docs.yaml/badge.svg)](https://github.com/bc3LC/medusa/actions/workflows/docs.yaml) @@ -7,7 +13,7 @@ [![build](https://github.com/bc3LC/medusa/actions/workflows/build.yaml/badge.svg)](https://github.com/bc3LC/medusa/actions/workflows/build.yaml) [![draft-pdf](https://github.com/bc3LC/medusa/actions/workflows/draft-pdf.yml/badge.svg)](https://github.com/bc3LC/medusa/actions/workflows/draft-pdf.yml) -## Contents +## Contents {#contents} @@ -15,7 +21,7 @@ - [Introduction](#introduction) - - [The package](#pkg) + - [The package](#pkg) - [The microsimulation model](#ms-model) @@ -27,7 +33,7 @@ -## Introduction +## Introduction {#introduction} @@ -37,16 +43,29 @@ ### The package -MEDUSA is an R package that allows the development of distributional analyses in isolation or in connection with other models (soft links). The extensive database in which the microsimulation model is based allows for highly disaggregated results, taking into account numerous socioeconomic and demographic characteristics of households, such as income level, place of residence, type of family or the feminization degree of the household. Additionally, the package combines these with the calculation of energy and transport poverty indices. +MEDUSA is an R package that allows the development of distributional +analyses in isolation or in connection with other models (soft links). +The extensive database in which the microsimulation model is based +allows for highly disaggregated results, taking into account numerous +socioeconomic and demographic characteristics of households, such as +income level, place of residence, type of family or the feminization +degree of the household. Additionally, the package combines these with +the calculation of energy and transport poverty indices. -:exclamation::exclamation: **Note**: At the moment, `medusa` package works for Spain, but in the short term the idea is to extend it to all EU countries. Moreover, the package could be extended to all countries that are able to provide the raw data of the model. +:exclamation::exclamation: **Note**: At the moment, `medusa` package +works for Spain, but in the short term the idea is to extend it to all +EU countries. Moreover, the package could be extended to all countries +that are able to provide the raw data of the model. ### The microsimulation model -MEDUSA allows to develop distributional analyses based in an overnight effect microsimulation model. The model is built up with the microdata from the Household Budget Survey. For more details on the model in which `medusa` is based click [here](https://bc3lc.github.io/medusa/articles/TheModel.html). - +MEDUSA allows to develop distributional analyses based in an overnight +effect microsimulation model. The model is built up with the microdata +from the Household Budget Survey. For more details on the model in which +`medusa` is based click +[here](https://bc3lc.github.io/medusa/articles/TheModel.html). -## Installation Guide +## Installation Guide {#installation-guide} @@ -60,17 +79,21 @@ To use `medusa` package first you need to follow this steps: - To download R click [here](https://www.r-project.org/) - - To download Rstudio click [here](https://www.rstudio.com/) + - To download Rstudio click [here](https://www.rstudio.com/) 2. Load `medusa` package. For this, open Rstudio and load the library: - + ``` r install.packages('remotes') + options(timeout=400) # to prevent errors if the connection is slow remotes::install_github("bc3LC/medusa") library(medusa) ``` -
-Now the package is fully loaded and you can start using its functions. + +:exclamation::exclamation: This may take a few minutes because the package contains a lot of data. + +
+Now the package is fully loaded and you can start using its functions.
## Getting Started @@ -81,21 +104,24 @@ Now the package is fully loaded and you can start using its functions. [Back to Contents](#contents) -The `medusa` package contains 3 main blocks of functions: - -1. Module 1. Functions to calculate distributional impacts - - Main function: `calc_di`, `ex_shocks`, `available_var_impact` , `available_var_intersec` and `ex_var_intersec`. - - Auxiliary functions: `load_rawhbs`, `rename_values`, `standardize`, `add_coicop`, `elevate_hbs`, `price_shock`, `impact`, `impact_intersectional`, `basic_graph`, `intersectional_graph`, `order_var`, `adjust_wh` and `adjust_wh_is`. - - -2. Module 2. Functions to calculate energy poverty indices - - Main function: `calc_ep` - - Auxiliary functions: `id_ep1`, `id_ep2`, `weighted.median` and `weighted.quantile`. - - -3. Module 3. Functions to calculate transport poverty indices - - Main function: `calc_tp` - - Auxiliary functions: `id_tp`, `weighted.median` and `weighted.quantile`. - - -In addition, the package includes some default input files (.Rda), that are read by the different functions. +The `medusa` package contains 3 main blocks of functions: + +1. Module 1. Functions to calculate distributional impacts + - Main function: `calc_di`, `ex_shocks`, `available_var_impact` , + `available_var_intersec` and `ex_var_intersec`. + - Auxiliary functions: `load_rawhbs`, `rename_values`, + `standardize`, `add_coicop`, `elevate_hbs`, `price_shock`, + `impact`, `impact_intersectional`, `basic_graph`, + `intersectional_graph`, `order_var`, `adjust_wh` and + `adjust_wh_is`. +2. Module 2. Functions to calculate energy poverty indices + - Main function: `calc_ep` + - Auxiliary functions: `id_ep1`, `id_ep2`, `weighted.median` and + `weighted.quantile`. +3. Module 3. Functions to calculate transport poverty indices + - Main function: `calc_tp` + - Auxiliary functions: `id_tp`, `weighted.median` and + `weighted.quantile`. + +In addition, the package includes some default input files (.Rda), that +are read by the different functions. diff --git a/docs/index.html b/docs/index.html index a78881f..eedf493 100644 --- a/docs/index.html +++ b/docs/index.html @@ -200,11 +200,13 @@

Load medusa package. For this, open Rstudio and load the library:

 install.packages('remotes')
+options(timeout=400)                             # to prevent errors if the connection is slow
 remotes::install_github("bc3LC/medusa")
 library(medusa)
-


Now the package is fully loaded and you can start using its functions.

+

This may take a few minutes because the package contains a lot of data.

+


Now the package is fully loaded and you can start using its functions.

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 3ba57a2..3a88aeb 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -10,4 +10,4 @@ articles: Tutorials: Tutorials.html TutorialsEnergyPoverty: TutorialsEnergyPoverty.html TutorialsTransportPoverty: TutorialsTransportPoverty.html -last_built: 2024-09-13T08:27Z +last_built: 2024-09-18T14:47Z diff --git a/man/calc_ep.Rd b/man/calc_ep.Rd index 7c03260..cc69947 100644 --- a/man/calc_ep.Rd +++ b/man/calc_ep.Rd @@ -9,7 +9,7 @@ calc_ep(year, index = "all") \arguments{ \item{year}{year/s for energy poverty indices calculation} -\item{index}{energy poverty index or indices you want to calculate. Possible +\item{index}{energy poverty index or indices to be calculated. Possible options: 10%, 2M, LIHC, HEP, HEP_LI. If "all" (by default) calculates all the indices for the selected year/s.} } diff --git a/man/calc_tp.Rd b/man/calc_tp.Rd index 161584b..90ce82d 100644 --- a/man/calc_tp.Rd +++ b/man/calc_tp.Rd @@ -9,7 +9,7 @@ calc_tp(year, index = "all") \arguments{ \item{year}{year/s for transport poverty indices calculation} -\item{index}{transport poverty index or indices you want to calculate. Possible +\item{index}{transport poverty index or indices to be calculated. Possible options: 10%, 2M, LIHC, VTU. If "all" (by default) calculates all the indices for the selected year/s.} } diff --git a/paper/paper.md b/paper/paper.md index 5ca3eac..b4b2381 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -22,18 +22,18 @@ bibliography: paper.bib # Summary -Addressing 21$^{st}$-century challenges, such as climate change, demands policies that not only advance social justice but also prevent the exacerbation of existing inequalities. While Integrated Assessment Models (IAMs) have been a fundamental tool to carry out impact analyses of policies from a holistic perspective, micro-simulation models are crucial for identifying heterogeneous socioeconomic impacts and ensuring fairer and more targeted policies. +Addressing 21$^{st}$-century challenges, such as climate change, demands policies that not only advance social justice but also prevent the exacerbation of existing inequalities. While Integrated Assessment Models (IAMs) are a fundamental tool to carry out impact analyses of policies from a holistic perspective, micro-simulation models are crucial for identifying heterogeneous socioeconomic impacts and ensuring fairer and more targeted policies. `medusa` is an R package designed for conducting distributional analyses, either independently or in conjunction with other models, including IAMs. The extensive database in which the microsimulation model is based allows highly disaggregated results by considering a wide range of socioeconomic and demographic characteristics of households, such as income level, place of residence, type of family, and feminization degree. The package combines this detailed household data with the calculation of energy and transport poverty indices. The structure of the `medusa` package is summarized in Figure 1. ![Structure of the `medusa` package](figure1.png) -The `medusa` package is available online through the public domain at . Below is a simplified code example demonstrating how to execute the package. For a comprehensive introduction to `medusa`, a detailed step-by-step tutorial is provided in the form of an R vignette, accessible [here](https://bc3lc.github.io/medusa/). +The `medusa` package is available online through the public domain . Below is a simplified code example demonstrating how to execute the package. For a comprehensive introduction to `medusa`, a detailed step-by-step tutorial is provided in the form of an R vignette, accessible [here](https://bc3lc.github.io/medusa/). ``` r install.packages("remotes") library(remotes) -remotes::install_github("bc3LC/medusa") +install_github("bc3LC/medusa") library(medusa) # Download the example file to enter price shocks @@ -41,22 +41,22 @@ ex_shocks() # After introducing the price shocks in the csv file, upload the edited file file_name <- read.csv(file, # File name or full file path - header = TRUE, # Read the header (TRUE) - sep = ",", # Value separator - dec = ".", # Decimal point - ...) # Additional arguments + header = TRUE, # Header indicator (set to TRUE) + sep = ",", # Value separator used in the file + dec = ".", # Decimal point format used + ...) # Additional arguments to be passed # Calculate distributional impacts calc_di( year, # Base year for the simulation - elevate = F, # Do not elevate the raw data - shocks = file_name, # Indicate the name of the uploaded file - var_impact = "DECILE", # Indicate the socioeconomic variable - ...) # Additional arguments + elevate = F, # Elevation of raw data (set to FALSE) + shocks = file_name, # Name of the uploaded file with shocks + var_impact = "DECILE", # Socioeconomic variable to be used + ...) # Additional arguments to be passed ``` # Statement of need -Addressing critical challenges like climate change requires ambitious policies that promote social justice without worsening existing inequalities, such as income or gender disparities [@alonso-epelde2024]. To ensure this, it is essential to conduct policy impact assessments that not only consider the economy, energy, land, and water systems holistically but also analyze the distributional impacts across different population groups [@bazoli2022; @walker2010]. While Integrated Assessment Models (IAMs) have been invaluable in policy evaluation [@van2020], they often lack the granularity needed to assess socio-economic disparities. Micro-simulation models for distributional analysis fill this gap by providing detailed, heterogeneous results, enabling policymakers to identify vulnerable populations and implement targeted compensatory measures [@tomas2023]. This ensures that policies are equitable and socially just. +Addressing critical challenges like climate change requires ambitious policies that promote social justice without worsening existing inequalities, such as income or gender disparities [@alonso-epelde2024]. To ensure this, it is essential to conduct policy impact assessments that not only consider the economy, energy, land, and water systems holistically but also analyze the distributional impacts across different population groups [@bazoli2022; @walker2010]. While Integrated Assessment Models (IAMs) are invaluable in policy evaluation [@van2020], they often lack the granularity needed to assess socio-economic disparities. Micro-simulation models for distributional analysis fill this gap by providing detailed, heterogeneous results, enabling policymakers to identify vulnerable populations and implement targeted compensatory measures [@tomas2023]. This ensures that policies are equitable and socially just. `medusa` facilitates distributional impact analyses through an overnight-effect microsimulation model, leveraging microdata from the Household Budget Survey (HBS), a standardized and comprehensive dataset available across EU countries [@eurostat2003]. The HBS offers detailed insights into household consumption patterns and socioeconomic characteristics at both household and individual levels, allowing for highly granular analysis. This enables the integration of an intersectional approach[^1] considering factors such as class, gender, and race, and provides more robust and nuanced results for assessing policy impacts on diverse population groups. @@ -68,7 +68,7 @@ The results derived from the model are presented as the relative impact ($\%$) o $$\Delta e_h^s = \frac{ \sum_c e_{c,h} (1+\Delta p_c^s) - \sum_c e_{c,h} }{ \sum_c e_{c,h} } \times 100$$ -here $e_{c,h}$ refers to the total spending on each consumption category, $c$, consumed by each of the household, $h$, in the baseline scenario and $\Delta p_c^s$ is the increase in prices by consumption category and scenario obtained with the price model. +here $e_{c,h}$ refers to the total spending on each consumption category $c$ consumed by each household $h$ in the baseline scenario and $\Delta p_c^s$ is the increase in prices by consumption category and scenario obtained with the price model. # Functionality @@ -82,7 +82,7 @@ The `medusa` package includes several functions that have been classified in 3 m The package includes default input files (.Rda), which are required for running the various functions, simplifying the process for users. -Output files are generated in both comma-separated values (CSV) and Portable Network Graphics (PNG) formats, with user control over file creation. When the `save` parameter is set to TRUE, the function saves a CSV file containing the selected results in the defined directory. Additionally, if the `fig` parameter is set to TRUE, the function produces and saves a bar plot to visualize the corresponding output. +Output files are generated in both comma-separated values (CSV) and portable network graphics (PNG) formats, with user control over file creation. When the `save` parameter is set to TRUE, the function saves a CSV file containing the selected results in the defined directory. Additionally, if the `fig` parameter is set to TRUE, the function produces and saves a bar plot to visualize the corresponding output. The package is actively evolving to meet research and policy needs, with several new features planned for future releases. For instance, an upcoming update will extend simulation capabilities to all EU countries, as the initial release currently covers only Spain. Additionally, we are developing a user interface designed to enable individuals without R programming expertise to perform socioeconomic analyses effectively.