Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Research about fatality rates #10

Open
kenarab opened this issue May 24, 2020 · 0 comments
Open

Research about fatality rates #10

kenarab opened this issue May 24, 2020 · 0 comments
Assignees
Labels

Comments

@kenarab
Copy link
Collaborator

kenarab commented May 24, 2020

#install.packages("devtools")
#devtools::install_github("ROpenStats/COVID19analytics")

library(COVID19analytics)
#> Warning: replacing previous import 'ggplot2::Layout' by 'lgr::Layout' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::intersect' by 'lubridate::intersect'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::union' by 'lubridate::union' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::setdiff' by 'lubridate::setdiff' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::col_factor' by 'scales::col_factor'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::equals' by 'testthat::equals' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::not' by 'testthat::not' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::is_less_than' by
#> 'testthat::is_less_than' when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::matches' by 'testthat::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'testthat::matches' by 'tidyr::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'COVID19analytics'
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
# Generate daily plots
processor <- COVID19DataProcessor$new(provider.id = "JohnsHopkingsUniversity", missing.values.model.id = "imputation")
dummy <- processor$setupData()
#> INFO  [17:21:29.236]  {stage: processor-setup}
#> INFO  [17:21:29.263] Checking required downloaded  {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO  [17:21:29.353] Checking required downloaded  {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO  [17:21:29.374] Checking required downloaded  {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO  [17:21:29.417]  {stage: data loaded}
#> INFO  [17:21:29.419]  {stage: data-setup}
dummy <- processor$transform()
#> INFO  [17:21:29.421] Executing transform 
#> INFO  [17:21:29.421] Executing consolidate 
#> INFO  [17:21:30.758]  {stage: consolidated}
#> INFO  [17:21:30.759] Executing standarize 
#> INFO  [17:21:30.814] gathering DataModel 
#> INFO  [17:21:30.815]  {stage: datamodel-setup}
dummy <- processor$curate()
#> INFO  [17:21:30.818]  {stage: loading-aggregated-data-model}
#> Warning in countrycode(x, origin = "country.name", destination = "continent"): Some values were not matched unambiguously: MS Zaandam
#> INFO  [17:21:32.584]  {stage: calculating-rates}
#> INFO  [17:21:32.727]  {stage: making-data-comparison}
#> INFO  [17:21:33.832]  {stage: applying-missing-values-method}
#> INFO  [17:21:33.833]  {stage: Starting first imputation}
#> INFO  [17:21:33.837]  {stage: calculating-rates}
#> INFO  [17:21:34.066]  {stage: making-data-comparison-2}
#> INFO  [17:21:35.068]  {stage: calculating-top-countries}
#> INFO  [17:21:35.084]  {stage: processed}


data.significative <- processor$data.agg %>% filter(confirmed >= 1000)
data.country.avg <- data.significative %>%
 group_by(country) %>%
 summarize(confirmed = max(confirmed),
           fatality.rate.min.mean = mean(fatality.rate.min),
           fatality.rate.min.cv   = sd(fatality.rate.min)/fatality.rate.min.mean,
           fatality.rate.max.mean =mean(fatality.rate.max),
           fatality.rate.max.cv   = sd(fatality.rate.max)/fatality.rate.max.mean) %>%
 arrange(fatality.rate.min.mean)
data.country.avg
#> # A tibble: 108 x 6
#>    country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#>    <chr>       <int>            <dbl>            <dbl>            <dbl>
#>  1 Qatar       48947          0.00118           0.646           0.00223
#>  2 Singap…     32876          0.00170           0.750           0.00304
#>  3 Bahrain      9692          0.00261           0.439           0.00403
#>  4 Djibou…      2697          0.00309           0.451           0.00439
#>  5 Maldiv…      1457          0.00338           0.0925          0.00646
#>  6 Uzbeki…      3369          0.00408           0.115           0.00569
#>  7 Oman         8373          0.00480           0.0814          0.00840
#>  8 Iceland      1805          0.00488           0.236           0.00575
#>  9 Guinea…      1195          0.00545           0.110           0.0107 
#> 10 Guinea       3275          0.00563           0.104           0.00908
#> # … with 98 more rows, and 1 more variable: fatality.rate.max.cv <dbl>

ggplot(data.country.avg) + geom_histogram(aes(x = fatality.rate.min.mean), bins = 60)

least.letality <- data.country.avg %>%
                    arrange(fatality.rate.min.mean) %>%
                    filter(fatality.rate.min.mean <= 0.05 & confirmed >= 30000) %>%
                    arrange(desc(confirmed))
least.letality
#> # A tibble: 17 x 6
#>    country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#>    <chr>       <int>            <dbl>            <dbl>            <dbl>
#>  1 US        1699176          0.0452            0.358           0.0828 
#>  2 Russia     370680          0.00868           0.164           0.0160 
#>  3 Germany    181524          0.0262            0.654           0.0328 
#>  4 Turkey     159797          0.0241            0.154           0.0387 
#>  5 India      158086          0.0314            0.0748          0.0543 
#>  6 Peru       135905          0.0285            0.134           0.0463 
#>  7 China       84106          0.0409            0.288           0.0488 
#>  8 Chile       82289          0.0103            0.323           0.0163 
#>  9 Saudi …     78541          0.00831           0.397           0.0146 
#> 10 Pakist…     59151          0.0187            0.238           0.0328 
#> 11 Qatar       48947          0.00118           0.646           0.00223
#> 12 Belarus     38956          0.00704           0.264           0.0127 
#> 13 Bangla…     38292          0.0220            0.425           0.0415 
#> 14 Singap…     32876          0.00170           0.750           0.00304
#> 15 United…     31969          0.00774           0.236           0.0134 
#> 16 Portug…     31292          0.0331            0.309           0.0612 
#> 17 Switze…     30776          0.0431            0.454           0.0541 
#> # … with 1 more variable: fatality.rate.max.cv <dbl>

most.letality <- data.country.avg %>%
  arrange(fatality.rate.min.mean) %>%
  filter(fatality.rate.min.mean > 0.05 & confirmed >= 30000) %>%
  arrange(desc(confirmed))
most.letality
#> # A tibble: 11 x 6
#>    country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#>    <chr>       <int>            <dbl>            <dbl>            <dbl>
#>  1 Brazil     411821           0.0561            0.289           0.0903
#>  2 United…    268619           0.124             0.311           0.231 
#>  3 Spain      236259           0.0941            0.315           0.137 
#>  4 Italy      231139           0.112             0.313           0.175 
#>  5 France     183067           0.109             0.465           0.173 
#>  6 Iran       141591           0.0590            0.195           0.0806
#>  7 Mexico      78023           0.0842            0.294           0.117 
#>  8 Belgium     57592           0.117             0.474           0.190 
#>  9 Nether…     45970           0.102             0.325           0.192 
#> 10 Ecuador     38103           0.0532            0.376           0.0971
#> 11 Sweden      35088           0.0874            0.491           0.157 
#> # … with 1 more variable: fatality.rate.max.cv <dbl>


compared.countries <- unique(c(least.letality$country,
                               "Argentina", "Brazil", "Chile", "US", "Japan", "Korea, South", "Germany", "Japan"))
compared.countries
#>  [1] "US"                   "Russia"               "Germany"             
#>  [4] "Turkey"               "India"                "Peru"                
#>  [7] "China"                "Chile"                "Saudi Arabia"        
#> [10] "Pakistan"             "Qatar"                "Belarus"             
#> [13] "Bangladesh"           "Singapore"            "United Arab Emirates"
#> [16] "Portugal"             "Switzerland"          "Argentina"           
#> [19] "Brazil"               "Japan"                "Korea, South"

rg <- ReportGeneratorEnhanced$new(data.processor = processor)

ggplot <- rg$ggplotCountriesLines(included.countries = compared.countries,
                                  min.confirmed = 100,
                                  field.description  = "Death Rates min",
                                  field = "fatality.rate.min", countries.text = "Compared Countries",
                                  log.scale = FALSE)
ggplot

ggplot <- rg$ggplotCrossSection(included.countries = compared.countries,
                                field.x = "confirmed",
                                field.y = "fatality.rate.min",
                                plot.description  = "Cross section Confirmed vs  Death rate min",
                                log.scale.x = TRUE,
                                log.scale.y = FALSE)
ggplot

ggplot <- rg$ggplotCrossSection(included.countries = most.letality$country,
                                field.x = "confirmed",
                                field.y = "fatality.rate.min",
                                plot.description  = "Cross section Confirmed vs  Death rate min",
                                log.scale.x = TRUE,
                                log.scale.y = FALSE)
ggplot

Created on 2020-05-28 by the reprex package (v0.3.0)

@kenarab kenarab self-assigned this May 24, 2020
@kenarab kenarab added the RR label May 28, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

1 participant