Skip to content
This repository has been archived by the owner on Jun 2, 2021. It is now read-only.

Dev jebyrnes #5

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions R/refresh_coronavirus.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#' Refresh the 2019 Novel Coronavirus COVID-19 (2019-nCoV) Dataset in the Covid19R Project Format
#'
#' Daily summary of the Coronavirus (COVID-19) cases by state/province.
#' @return A tibble object
#' * date - The date in YYYY-MM-DD form
#' * location - The name of the location as provided by the data source.
#' * location_type - The type of location using the covid19R controlled vocabulary.
#' * location_code - A standardized location code using a national or international standard. Drawn from \href{https://github.com/olahol/iso-3166-2.js/}{iso-3166-2.js}'s version
#' * location_code_type The type of standardized location code being used according to the covid19R controlled vocabulary. Here we use `iso_3166_2`
#' * data_type - the type of data in that given row using the covid19R controlled vocabulary. Includes cases_new, deaths_new, recovered_new.
#' * value - number of cases of each data type
#' @export refresh_coronavirus_jhu
#' @return A data.frame object
#' @source coronavirus - Johns Hopkins University Center for Systems Science and Engineering (JHU CCSE) Coronavirus \href{https://systems.jhu.edu/research/public-health/ncov/}{website}
#'
#' @examples
#' \dontrun{
#' # update the data
#' jhu_covid19_dat <- refresh_coronavirus_jhu()
#' }
#'
refresh_coronavirus_jhu <- function(){
utils::read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus_covid19format.csv",
stringsAsFactors = FALSE)
}



#' Get information about the datasets provided by the coronavirus package
#'
#' @description Returns information about the datasets in this package for covid19R harvesting
#'
#' @return a tibble of information about the datasets in this package
#' @export get_info_coronavirus
#'
#' @examples
#' \dontrun{
#'
#' # get the dataset info from this package
#' get_info_coronavirus()
#' }
#'
get_info_coronavirus <- function(){
data.frame(
data_set_name = "coronavirus_jhu",
package_name = "coronavirus",
function_to_get_data = "refresh_coronavirus_jhu*",
data_details = "The 2019 Novel Coronavirus COVID-19 (2019-nCoV) Dataset from the Johns Hopkins University Center for Systems Science and Engineering",
data_url = "https://systems.jhu.edu/research/public-health/ncov/",
license_url = "https://github.com/CSSEGISandData/COVID-19/",
data_types = "cases_new, recovered_new, deaths_new",
location_types = "country, state",
spatial_extent = "global",
TRUE
)
}
61 changes: 61 additions & 0 deletions data_raw/data_covid19R.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#----------------------------------------------------
# Creating a covid19R compliant JHU coronavirus data set
# using coronavirus data
# https://github.com/CSSEGISandData/COVID-19

`%>%` <- magrittr::`%>%`
setwd(here::here())
source("data-raw/dplyr::left_join")


# the initial data
# git_df <- read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus.csv",
# stringsAsFactors = FALSE)

# create valid locations
git_df_long_location <- git_df %>%
dplyr::mutate(
country = ifelse(country == "Korea, South", "South Korea", country),
province = ifelse(province == "Bonaire, Sint Eustatius and Saba",
"Bonaire and Sint Eustatius and Saba", province
)
) %>%
tidyr::unite(location, province, country, sep = ", ") %>%
dplyr::rename(
data_type = type,
value = cases
) %>%

# fix some bad location names
dplyr::mutate(
location = gsub("^\\, ", "", location),
location_type = ifelse(grepl("\\,", location), "state", "country")
)

code_table <- get_code_table()

# add codes
coronavirus_covid19 <- dplyr::left_join(git_df_long_location, code_table)

# fix data types
coronavirus_covid19 <- coronavirus_covid19 %>%
dplyr::mutate(data_type = dplyr::case_when(
data_type == "confirmed" ~ "cases_new",
data_type == "recovered" ~ "recovered_new",
data_type == "death" ~ "deaths_new",
))

coronavirus_covid19 <- coronavirus_covid19 %>%
dplyr::select(
date, location, location_type,
location_code, location_code_type,
data_type, value, lat, long
)

# data checks
sum(is.na(coronavirus_covid19$location_code)) # make sure codes combine a-ok - will be >0 due to cruise ships
nrow(coronavirus_covid19) - nrow(git_df_long_location) # should be 0, or there was a one to many match

# write out
# write.csv(coronavirus, "csv/coronavirus_covid19format.csv", row.names = FALSE)
print("covid19R compliant data done...")
100 changes: 100 additions & 0 deletions data_raw/get_code_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
get_code_table <- function() {
# get iso 3166 2 codes
iso_codes <-
read.csv(
"https://github.com/olahol/iso-3166-2.js/raw/master/data.csv",
col.names = c(
"Country",
"iso_3166_2",
"name",
"type",
"Country_iso_3166_2"
),
na.strings = "."
) %>%
dplyr::mutate(
name = ifelse(
iso_3166_2 == "NL-BQ1",
"Bonaire and Sint Eustatius and Saba",
name
),
Country = ifelse(iso_3166_2 == "VG-VG", "British Virgin Islands", Country),
Country = ifelse(
Country_iso_3166_2 == "CD",
"The Democratic Republic Of The Congo",
Country
),
Country = ifelse(Country_iso_3166_2 == "CZ", "Czechia", Country),
)

country_code <- iso_codes %>%
dplyr::mutate(location = Country) %>%
dplyr::group_by(location) %>%
dplyr::summarize(location_code = Country_iso_3166_2[1]) %>%
dplyr::bind_rows(
data.frame(location = "Cabo Verde", location_code = "CV"),
data.frame(location = "Greenland, Denmark", location_code = "GL"),
data.frame(location = "Cote d'Ivoire", location_code = "CI")
) #do not know why these were missing

province_code <- iso_codes %>%
dplyr::mutate(type = ifelse(iso_3166_2 == "NL-AW", "Province", type)) %>% # problem with Aruba
dplyr::filter(type != "Country") %>%
dplyr::mutate(location = paste(name, Country, sep = ", ")) %>%
dplyr::group_by(location) %>%
dplyr::summarize(location_code = iso_3166_2[1]) %>%
dplyr::bind_rows(
data.frame(location = "Channel Islands, United Kingdom", location_code = "GB-CHA"),
data.frame(location = "Tibet, China", location_code = "CN-XZ"),
data.frame(location = "Inner Mongolia, China", location_code = "CN-NM")
)

code_table <- dplyr::bind_rows(country_code, province_code) %>%
dplyr::mutate(
location_code_type = "iso_3166_2",
location = dplyr::case_when(
location == "Cayman Islands" ~ "Cayman Islands, United Kingdom",
location == "Anguilla" ~ "Anguilla, United Kingdom",
location == "Kinshasa, The Democratic Republic Of The Congo" ~ "Congo (Kinshasa)",
location == "Brazzaville, Congo" ~ "Congo (Brazzaville)",
location == "Brunei Darussalam" ~ "Brunei",
location == "Myanmar" ~ "Burma",
location == "Falkland Islands" ~ "Falkland Islands (Malvinas), United Kingdom",
location == "Swaziland" ~ "Eswatini",
location == "Bermuda" ~ "Bermuda, United Kingdom",
location == "Curaçao" ~ "Curacao, Netherlands",
location == "French Polynesia" ~ "French Polynesia, France",
location == "British Virgin Islands" ~ "British Virgin Islands, United Kingdom",
location == "Faroe Islands" ~ "Faroe Islands, Denmark",
location == "French Guiana" ~ "French Guiana, France",
location == "French Guiana" ~ "French Guiana, France",
location == "Gibraltar" ~ "Gibraltar, United Kingdom",
location == "Vatican City" ~ "Holy See",
location == "Vatican City" ~ "Holy See",
location == "Isle of Man" ~ "Isle of Man, United Kingdom",
location == "Kosovo-Metohija, Serbia" ~ "Kosovo",
location == "Macau" ~ "Macau, China",
location == "Montserrat" ~ "Montserrat, United Kingdom",
location == "New Caledonia" ~ "New Caledonia, France",
location == "Macedonia, the Former Yugoslav Republic Of" ~ "North Macedonia",
location == "Reunion" ~ "Reunion, France",
location == "Saint-Barthélemy, France" ~ "Saint Barthelemy, France",
location == "Saint Kitts And Nevis" ~ "Saint Kitts and Nevis",
location == "Saint-Pierre-et-Miquelon, France" ~ "Saint Pierre and Miquelon, France",
location == "Saint Vincent And The Grenadines" ~ "Saint Vincent and the Grenadines",
location == "St. Maarten" ~ "Sint Maarten, Netherlands",
location == "Korea, Republic of" ~ "South Korea",
location == "Saint-Martin, France" ~ "St Martin, France",
location == "Taiwan" ~ "Taiwan*",
location == "East Timor" ~ "Timor-Leste",
location == "Turks & Caicos Islands" ~ "Turks and Caicos Islands, United Kingdom",
location == "United States" ~ "US",
location == "Viet Nam" ~ "Vietnam",
location == "Gaza, Palestine" ~ "West Bank and Gaza",
TRUE ~ location
)
)


code_table
}