From 9c33fbffbcbe1f5a7517b3d75b6a53e6ffac3992 Mon Sep 17 00:00:00 2001 From: Diffform Date: Tue, 19 Mar 2019 14:27:31 +0100 Subject: [PATCH 1/8] Create function to create history coin lists as can be found on CMC, e.g. https://coinmarketcap.com/historical/20150505/ --- R/scraper_hist.R | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 R/scraper_hist.R diff --git a/R/scraper_hist.R b/R/scraper_hist.R new file mode 100644 index 0000000..92608d8 --- /dev/null +++ b/R/scraper_hist.R @@ -0,0 +1,58 @@ +#' Historical table scraper +#' +#' This web scrapes the historic price tables from CoinMarketCap +#' and provides back a dataframe for the coin provided as an input. +#' This function is a dependency of getCoins and is used +#' as part of a loop to retrieve all crypto currencies. +#' +#' @param attributes URL generated from \code{listCoins()} +#' @param slug Unique identifier required for merging +#' @param sleep Duration to sleep to resolve rate limiter +#' +#' @return Raw OHLC market data in a dataframe: +#' \item{slug}{Coin url slug} +#' \item{symbol}{Coin symbol} +#' \item{name}{Coin name} +#' \item{date}{Market date} +#' \item{open}{Market open} +#' \item{high}{Market high} +#' \item{low}{Market low} +#' \item{close}{Market close} +#' \item{volume}{Volume 24 hours} +#' \item{market}{USD Market cap} +#' +#' @importFrom dplyr "%>%" "mutate" "select" +#' @importFrom tibble "as_tibble" +#' @importFrom tidyr "separate" +#' @importFrom rvest "html_nodes" "html_table" +#' @importFrom xml2 "read_html" +#' @importFrom curl "new_handle" +#' +scraper_hist <- function(attributes, sleep = NULL) { + . <- "." + history_url <- as.character(attributes) + if (!is.null(sleep)) Sys.sleep(sleep) + + page <- tryCatch( + xml2::read_html(history_url, + handle = curl::new_handle("useragent" = "Mozilla/5.0")), + error = function(e) e) + + if (inherits(page, "error")) { + closeAllConnections() + message("\n") + message(cli::cat_bullet("Rate limit hit. Sleeping for 60 seconds.", bullet = "warning", bullet_col = "red"), appendLF = TRUE) + Sys.sleep(65) + page <- xml2::read_html(history_url, + handle = curl::new_handle("useragent" = "Mozilla/5.0")) + } + + table <- rvest::html_nodes(page, css = "table") %>% .[1] %>% + rvest::html_table(fill = TRUE) %>% + replace(!nzchar(.), NA) + + scraper <- table[[1]][,2:3] %>% tibble::as_tibble() %>% + tidyr::separate(Name,sep = "\n",into=c("symbol","name")) %>% mutate(slug=sub("[^[:alnum:]]","-",sub("[[:punct:]]","-",tolower(name)))) %>% + dplyr::select(-Symbol) + return(scraper) +} From 771efe650942984f6a6f5aaa3e2f4df501eb9d57 Mon Sep 17 00:00:00 2001 From: Diffform Date: Tue, 19 Mar 2019 14:28:08 +0100 Subject: [PATCH 2/8] Add the possibility to scrape historical crypto_lists. --- R/crypto_list.R | 62 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/R/crypto_list.R b/R/crypto_list.R index e92a94c..edc7cc5 100644 --- a/R/crypto_list.R +++ b/R/crypto_list.R @@ -1,14 +1,18 @@ -#' Retrieves name, symbol, slug and rank for all tokens +#' Retrieves name, symbol, slug and rank for all tokens at specific historic date #' -#' List all of the crypto currencies that have existed on CoinMarketCap -#' and use this to populate the URL base for scraping historical market +#' List all of the crypto currencies that have existed on CoinMarketCap on specific date. +#' This can be used to add "dead" coins to the list of coins retrieved by `crypto_list()`. +#' Use this to populate/add to the URL base for scraping historical market #' data. It retrieves name, slug, symbol and rank of crypto currencies from #' CoinMarketCap and creates URLS for \code{scraper()} to use. #' #' @param coin Name, symbol or slug of crypto currency #' @param start_date Start date to retrieve data from, format yyyymmdd -#' @param end_date Start date to retrieve data from, format yyyymmdd +#' @param end_date End date to retrieve data from, format yyyymmdd, if not provided, today will be assumed +#' @param start_date_hist Start date to retrieve coin history from, format yyyymmdd +#' @param end_date_hist End date to retrieve coin history from, format yyyymmdd, if not provided, today will be assumed #' @param coin_list 'api', 'static' or NULL +#' @param date_gap 'months' #' #' @return Crypto currency historic OHLC market data in a dataframe: #' \item{symbol}{Coin symbol (not-unique)} @@ -21,7 +25,8 @@ #' Required dependency that is used in function call \code{getCoins()}. #' @importFrom tibble tibble #' @importFrom jsonlite fromJSON -#' @importFrom lubridate today +#' @importFrom lubridate today ymd +#' #' #' @examples #' \dontrun{ @@ -30,6 +35,10 @@ #' #' # return all coins #' coin_list <- crypto_list() +#' +#' # return all coins listed in 2015 +#' coins_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") +#' #' } #' #' @name crypto_list @@ -39,36 +48,52 @@ crypto_list <- function(coin = NULL, start_date = NULL, end_date = NULL, - coin_list = NULL) { + start_date_hist = NULL, + end_date_hist = NULL, + coin_list = NULL, + date_gap ="months") { + # get current coins if (is.null(coin_list)) { - json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json" - coins <- jsonlite::fromJSON(json) + out_list <- out_list_recent <- NULL + if (!is.null(start_date_hist)){ + # create dates + if (is.null(end_date_hist)) end_date_hist <- lubridate::today() + dates <- as.Date(seq(ymd(start_date_hist),ymd(end_date_hist),date_gap)) + for (i in 1:length(dates)){ + attributes <- paste0("https://coinmarketcap.com/historical/",format(dates[i], "%Y%m%d"),"/") + out_list <- rbind(out_list,scraper_hist(attributes, sleep = NULL) %>% dplyr::mutate(hist_date=dates[i])) + } + } + if (is.null(end_dates_hist)){ + json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json" + out_list_recent <- jsonlite::fromJSON(json) + } + coins <- rbind(out_list,out_list_recent %>% select(name,symbol,slug) %>% dplyr::mutate(hist_date=lubridate::today())) } else { ifelse(coin_list == "api", coins <- get_coinlist_api(), coins <- get_coinlist_static()) } - + # get historic coins if (!is.null(coin)) { - name <- coins$name - slug <- coins$slug - symbol <- coins$symbol + name <- coins$name %>% unique() + slug <- coins$slug %>% unique() + symbol <- coins$symbol %>% unique() c1 <- subset(coins, toupper(name) %in% toupper(coin)) c2 <- subset(coins, symbol %in% toupper(coin)) c3 <- subset(coins, slug %in% tolower(coin)) coins <- tibble::tibble() - if (nrow(c1) > 0) { coins <- rbind(coins, c1) } - if (nrow(c2) > 0) { coins <- rbind(coins, c2) } - if (nrow(c3) > 0) { coins <- rbind(coins, c3) } + if (nrow(c1) > 0) { coins <- rbind(coins, c1 %>% select(-hist_date)) } + if (nrow(c2) > 0) { coins <- rbind(coins, c2 %>% select(-hist_date)) } + if (nrow(c3) > 0) { coins <- rbind(coins, c3 %>% select(-hist_date)) } if (nrow(coins) > 1L) { coins <- unique(coins) } } coins <- tibble::tibble( symbol = coins$symbol, name = coins$name, - slug = coins$slug, - rank = coins$rank - ) + slug = coins$slug + ) %>% unique if (is.null(start_date)) { start_date <- "20130428" } if (is.null(end_date)) { end_date <- gsub("-", "", lubridate::today()) } exchangeurl <- paste0("https://coinmarketcap.com/currencies/", coins$slug, "/#markets") @@ -88,6 +113,5 @@ coins$slug <- as.character(coins$slug) coins$exchange_url <- as.character(exchange_url) coins$history_url <- as.character(history_url) - coins$rank <- as.numeric(coins$rank) return(coins) } From 92ca2a9a14c92256badc610791d7c3067d4dd1eb Mon Sep 17 00:00:00 2001 From: Diffform Date: Tue, 19 Mar 2019 14:44:48 +0100 Subject: [PATCH 3/8] Add possibility to get lists of historical coins via the historical snapshot facility of CMC. Needs srape_hist() --- R/crypto_list.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/crypto_list.R b/R/crypto_list.R index edc7cc5..13c4c80 100644 --- a/R/crypto_list.R +++ b/R/crypto_list.R @@ -64,11 +64,12 @@ out_list <- rbind(out_list,scraper_hist(attributes, sleep = NULL) %>% dplyr::mutate(hist_date=dates[i])) } } - if (is.null(end_dates_hist)){ + coins <- out_list + if (is.null(end_date_hist)){ json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json" out_list_recent <- jsonlite::fromJSON(json) + coins <- rbind(out_list,out_list_recent %>% select(name,symbol,slug) %>% dplyr::mutate(hist_date=lubridate::today())) } - coins <- rbind(out_list,out_list_recent %>% select(name,symbol,slug) %>% dplyr::mutate(hist_date=lubridate::today())) } else { ifelse(coin_list == "api", coins <- get_coinlist_api(), From 0a415e2942d67fc200239afac389c9b17d4b8283 Mon Sep 17 00:00:00 2001 From: Diffform Date: Tue, 19 Mar 2019 15:13:44 +0100 Subject: [PATCH 4/8] Prevent crypto from re-downloading the list if a list is provided. --- NAMESPACE | 4 ++++ R/crypto_history.R | 4 ++-- R/crypto_list.R | 1 - man/crypto_list.Rd | 22 +++++++++++++++++----- man/scraper_hist.Rd | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 man/scraper_hist.Rd diff --git a/NAMESPACE b/NAMESPACE index bd9687c..d763d9f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ importFrom(dplyr,"first") importFrom(dplyr,"group_by") importFrom(dplyr,"last") importFrom(dplyr,"mutate") +importFrom(dplyr,"select") importFrom(dplyr,"summarise") importFrom(dplyr,'%>%') importFrom(dplyr,'arrange') @@ -39,13 +40,16 @@ importFrom(jsonlite,read_json) importFrom(lubridate,"round_date") importFrom(lubridate,'mdy') importFrom(lubridate,today) +importFrom(lubridate,ymd) importFrom(rstudioapi,askForSecret) importFrom(rvest,"html_nodes") importFrom(rvest,"html_table") importFrom(tibble,"as.tibble") +importFrom(tibble,"as_tibble") importFrom(tibble,'as.tibble') importFrom(tibble,'tibble') importFrom(tibble,tibble) +importFrom(tidyr,"separate") importFrom(tidyr,'replace_na') importFrom(xml2,"read_html") importFrom(xts,"xts") diff --git a/R/crypto_history.R b/R/crypto_history.R index cd4bc78..11e9dc8 100644 --- a/R/crypto_history.R +++ b/R/crypto_history.R @@ -70,8 +70,8 @@ crypto_history <- function(coin = NULL, limit = NULL, start_date = NULL, end_dat message("ERC-20: 0x375923Bf82F0b728d23A5704261a6e16341fd860", appendLF = TRUE) message("XRP: rK59semLsuJZEWftxBFhWuNE6uhznjz2bK", appendLF = TRUE) message("\n") - - coins <- crypto_list(coin, start_date, end_date, coin_list) + # only if no coin_list is provided + if (is.null(coin_list)) coins <- crypto_list(coin, start_date, end_date, coin_list) if (!is.null(limit)) coins <- coins[1:limit, ] diff --git a/R/crypto_list.R b/R/crypto_list.R index 13c4c80..dafb8fd 100644 --- a/R/crypto_list.R +++ b/R/crypto_list.R @@ -18,7 +18,6 @@ #' \item{symbol}{Coin symbol (not-unique)} #' \item{name}{Coin name} #' \item{slug}{Coin URL slug (unique)} -#' \item{rank}{Current rank by market cap} #' \item{exchange_url}{Exchange market tables urls for scraping} #' \item{history_url}{Historical market tables urls for scraping} #' diff --git a/man/crypto_list.Rd b/man/crypto_list.Rd index 00cfd11..3aef444 100644 --- a/man/crypto_list.Rd +++ b/man/crypto_list.Rd @@ -2,19 +2,26 @@ % Please edit documentation in R/crypto_list.R \name{crypto_list} \alias{crypto_list} -\title{Retrieves name, symbol, slug and rank for all tokens} +\title{Retrieves name, symbol, slug and rank for all tokens at specific historic date} \usage{ crypto_list(coin = NULL, start_date = NULL, end_date = NULL, - coin_list = NULL) + start_date_hist = NULL, end_date_hist = NULL, coin_list = NULL, + date_gap = "months") } \arguments{ \item{coin}{Name, symbol or slug of crypto currency} \item{start_date}{Start date to retrieve data from, format yyyymmdd} -\item{end_date}{Start date to retrieve data from, format yyyymmdd} +\item{end_date}{End date to retrieve data from, format yyyymmdd, if not provided, today will be assumed} + +\item{start_date_hist}{Start date to retrieve coin history from, format yyyymmdd} + +\item{end_date_hist}{End date to retrieve coin history from, format yyyymmdd, if not provided, today will be assumed} \item{coin_list}{'api', 'static' or NULL} + +\item{date_gap}{'months'} } \value{ Crypto currency historic OHLC market data in a dataframe: @@ -28,8 +35,9 @@ Crypto currency historic OHLC market data in a dataframe: Required dependency that is used in function call \code{getCoins()}. } \description{ -List all of the crypto currencies that have existed on CoinMarketCap -and use this to populate the URL base for scraping historical market +List all of the crypto currencies that have existed on CoinMarketCap on specific date. +This can be used to add "dead" coins to the list of coins retrieved by `crypto_list()`. +Use this to populate/add to the URL base for scraping historical market data. It retrieves name, slug, symbol and rank of crypto currencies from CoinMarketCap and creates URLS for \code{scraper()} to use. } @@ -40,6 +48,10 @@ coins <- crypto_list(coin) # return all coins coin_list <- crypto_list() + +# return all coins listed in 2015 +coins_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") + } } diff --git a/man/scraper_hist.Rd b/man/scraper_hist.Rd new file mode 100644 index 0000000..893fe0c --- /dev/null +++ b/man/scraper_hist.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scraper_hist.R +\name{scraper_hist} +\alias{scraper_hist} +\title{Historical table scraper} +\usage{ +scraper_hist(attributes, sleep = NULL) +} +\arguments{ +\item{attributes}{URL generated from \code{listCoins()}} + +\item{sleep}{Duration to sleep to resolve rate limiter} + +\item{slug}{Unique identifier required for merging} +} +\value{ +Raw OHLC market data in a dataframe: + \item{slug}{Coin url slug} + \item{symbol}{Coin symbol} + \item{name}{Coin name} + \item{date}{Market date} + \item{open}{Market open} + \item{high}{Market high} + \item{low}{Market low} + \item{close}{Market close} + \item{volume}{Volume 24 hours} + \item{market}{USD Market cap} +} +\description{ +This web scrapes the historic price tables from CoinMarketCap +and provides back a dataframe for the coin provided as an input. +This function is a dependency of getCoins and is used +as part of a loop to retrieve all crypto currencies. +} From 35645c528d023e96e760f7d2dd4e380eb7ff08fb Mon Sep 17 00:00:00 2001 From: Diffform Date: Wed, 20 Mar 2019 23:38:59 +0100 Subject: [PATCH 5/8] Adapt crypto_history() function to accept modified lists as input for the coin retrieval e.g. gathered using a custom version of rypto_list(). --- NAMESPACE | 1 + R/crypto_history.R | 19 +++++++++++-------- R/crypto_list.R | 2 +- man/crypto_history.Rd | 10 +++++++--- man/crypto_list.Rd | 3 +-- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d763d9f..5df7202 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,6 +47,7 @@ importFrom(rvest,"html_table") importFrom(tibble,"as.tibble") importFrom(tibble,"as_tibble") importFrom(tibble,'as.tibble') +importFrom(tibble,'as_tibble') importFrom(tibble,'tibble') importFrom(tibble,tibble) importFrom(tidyr,"separate") diff --git a/R/crypto_history.R b/R/crypto_history.R index 11e9dc8..b6d744f 100644 --- a/R/crypto_history.R +++ b/R/crypto_history.R @@ -6,7 +6,8 @@ #' analysis on the crypto financial markets or to attempt #' to predict future market movements or trends. #' -#' @param coin string Name, symbol or slug of crypto currency, default is all tokens +#' @param coins string if NULL retrieve all currently existing coins (crypto_list()), +#' or provide list of crypto currencies in the crypto_list() format (e.g. current and dead coins since 2015) #' @param limit integer Return the top n records, default is all tokens #' @param start_date string Start date to retrieve data from, format 'yyyymmdd' #' @param end_date string End date to retrieve data from, format 'yyyymmdd' @@ -18,7 +19,6 @@ #' \item{symbol}{Coin symbol} #' \item{name}{Coin name} #' \item{date}{Market date} -#' \item{ranknow}{Current Rank} #' \item{open}{Market open} #' \item{high}{Market high} #' \item{low}{Market low} @@ -35,7 +35,7 @@ #' @importFrom tidyr 'replace_na' #' @importFrom crayon 'make_style' #' @importFrom grDevices 'rgb' -#' @importFrom tibble 'tibble' 'as.tibble' +#' @importFrom tibble 'tibble' 'as_tibble' #' @importFrom cli 'cat_bullet' #' @importFrom lubridate 'mdy' #' @@ -50,12 +50,16 @@ #' #' # Retrieving this years market history for ALL crypto currencies #' all_coins <- crypto_history(start_date = '20180101') +#' +#' # Retrieve 2015 history for all 2015 crypto currencies +#' coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") +#' 2015_coins <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") #' } #' @name crypto_history #' #' @export #' -crypto_history <- function(coin = NULL, limit = NULL, start_date = NULL, end_date = NULL, +crypto_history <- function(coins = NULL, limit = NULL, start_date = NULL, end_date = NULL, coin_list = NULL, sleep = NULL) { pink <- crayon::make_style(grDevices::rgb(0.93, 0.19, 0.65)) options(scipen = 999) @@ -70,14 +74,13 @@ crypto_history <- function(coin = NULL, limit = NULL, start_date = NULL, end_dat message("ERC-20: 0x375923Bf82F0b728d23A5704261a6e16341fd860", appendLF = TRUE) message("XRP: rK59semLsuJZEWftxBFhWuNE6uhznjz2bK", appendLF = TRUE) message("\n") - # only if no coin_list is provided - if (is.null(coin_list)) coins <- crypto_list(coin, start_date, end_date, coin_list) + # only if no coins are provided + if (is.null(coins)) coins <- crypto_list(coin, start_date, end_date, coin_list) if (!is.null(limit)) coins <- coins[1:limit, ] - coin_names <- tibble::tibble(symbol = coins$symbol, name = coins$name, rank = coins$rank, - slug = coins$slug) + coin_names <- tibble::tibble(symbol = coins$symbol, name = coins$name,slug = coins$slug) to_scrape <- tibble::tibble(attributes = coins$history_url, slug = coins$slug) loop_data <- vector("list", nrow(to_scrape)) diff --git a/R/crypto_list.R b/R/crypto_list.R index dafb8fd..2c7384a 100644 --- a/R/crypto_list.R +++ b/R/crypto_list.R @@ -36,7 +36,7 @@ #' coin_list <- crypto_list() #' #' # return all coins listed in 2015 -#' coins_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") +#' coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") #' #' } #' diff --git a/man/crypto_history.Rd b/man/crypto_history.Rd index 2fe4e51..6357331 100644 --- a/man/crypto_history.Rd +++ b/man/crypto_history.Rd @@ -4,11 +4,12 @@ \alias{crypto_history} \title{Get historic crypto currency market data} \usage{ -crypto_history(coin = NULL, limit = NULL, start_date = NULL, +crypto_history(coins = NULL, limit = NULL, start_date = NULL, end_date = NULL, coin_list = NULL, sleep = NULL) } \arguments{ -\item{coin}{string Name, symbol or slug of crypto currency, default is all tokens} +\item{coins}{string if NULL retrieve all currently existing coins (crypto_list()), +or provide list of crypto currencies in the crypto_list() format (e.g. current and dead coins since 2015)} \item{limit}{integer Return the top n records, default is all tokens} @@ -26,7 +27,6 @@ Crypto currency historic OHLC market data in a dataframe: \item{symbol}{Coin symbol} \item{name}{Coin name} \item{date}{Market date} - \item{ranknow}{Current Rank} \item{open}{Market open} \item{high}{Market high} \item{low}{Market low} @@ -54,5 +54,9 @@ all_coins <- crypto_history(limit = 1) # Retrieving this years market history for ALL crypto currencies all_coins <- crypto_history(start_date = '20180101') + +# Retrieve 2015 history for all 2015 crypto currencies +coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") +2015_coins <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") } } diff --git a/man/crypto_list.Rd b/man/crypto_list.Rd index 3aef444..74a4fed 100644 --- a/man/crypto_list.Rd +++ b/man/crypto_list.Rd @@ -28,7 +28,6 @@ Crypto currency historic OHLC market data in a dataframe: \item{symbol}{Coin symbol (not-unique)} \item{name}{Coin name} \item{slug}{Coin URL slug (unique)} - \item{rank}{Current rank by market cap} \item{exchange_url}{Exchange market tables urls for scraping} \item{history_url}{Historical market tables urls for scraping} @@ -50,7 +49,7 @@ coins <- crypto_list(coin) coin_list <- crypto_list() # return all coins listed in 2015 -coins_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") +coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") } From b96315ba109d7ca108f3d0ec7f3bd35bafa0e246 Mon Sep 17 00:00:00 2001 From: Diffform Date: Thu, 21 Mar 2019 23:07:54 +0100 Subject: [PATCH 6/8] Adapt crypto_history to new coinlist setting, create fake "ranknow", adapt much more tidyverse-notation. --- NAMESPACE | 2 ++ R/crypto_history.R | 51 +++++++++++++++++++++---------------------- R/crypto_list.R | 10 ++++++--- man/crypto_history.Rd | 2 +- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5df7202..233b35b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,8 @@ importFrom(dplyr,"summarise") importFrom(dplyr,'%>%') importFrom(dplyr,'arrange') importFrom(dplyr,'mutate') +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(grDevices,'rgb') importFrom(httr,"GET") diff --git a/R/crypto_history.R b/R/crypto_history.R index b6d744f..6cd85dc 100644 --- a/R/crypto_history.R +++ b/R/crypto_history.R @@ -31,11 +31,11 @@ #' This is the main function of the crypto package. If you want to retrieve #' ALL coins then do not pass a argument to crypto_history(), or pass the coin name. #' -#' @importFrom dplyr '%>%' 'mutate' 'arrange' +#' @importFrom dplyr '%>%' 'mutate' 'arrange' 'left_join' "group" "ungroup" "slice" #' @importFrom tidyr 'replace_na' #' @importFrom crayon 'make_style' #' @importFrom grDevices 'rgb' -#' @importFrom tibble 'tibble' 'as_tibble' +#' @importFrom tibble 'tibble' 'as_tibble' 'rowid_to_column' #' @importFrom cli 'cat_bullet' #' @importFrom lubridate 'mdy' #' @@ -53,7 +53,7 @@ #' #' # Retrieve 2015 history for all 2015 crypto currencies #' coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") -#' 2015_coins <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") +#' coins_2015 <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") #' } #' @name crypto_history #' @@ -75,7 +75,7 @@ crypto_history <- function(coins = NULL, limit = NULL, start_date = NULL, end_da message("XRP: rK59semLsuJZEWftxBFhWuNE6uhznjz2bK", appendLF = TRUE) message("\n") # only if no coins are provided - if (is.null(coins)) coins <- crypto_list(coin, start_date, end_date, coin_list) + if (is.null(coins)) coins <- crypto_list(coin=NULL, start_date, end_date, coin_list) if (!is.null(limit)) coins <- coins[1:limit, ] @@ -94,32 +94,31 @@ crypto_history <- function(coins = NULL, limit = NULL, start_date = NULL, end_da loop_data[[i]] <- scraper(to_scrape$attributes[i], to_scrape$slug[i], sleep) } - results <- do.call(rbind, loop_data) %>% tibble::as.tibble() + results <- do.call(rbind, loop_data) %>% tibble::as_tibble() if (length(results) == 0L) stop("No data currently exists for this crypto currency.", call. = FALSE) - market_data <- merge(results, coin_names, by = "slug") - colnames(market_data) <- c("slug", "date", "open", "high", "low", "close", "volume", - "market", "symbol", "name", "ranknow") - market_data <- market_data[c("slug", "symbol", "name", "date", "ranknow", "open", - "high", "low", "close", "volume", "market")] - market_data$date <- lubridate::mdy(market_data$date, locale = platform_locale()) + market_data <- results %>% left_join(coin_names, by = "slug") + colnames(market_data) <- c("date", "open", "high", "low", "close", "volume", + "market", "slug", "symbol", "name") + # calculate fake ranknow based on markt cap on the last date in the dataset + market_data %>% dplyr::group_by(symbol) %>% dplyr::arrange(desc(date)) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% + tibble::rowid_to_column("ranknow") %>% dplyr::select(slug,ranknow) - market_data[, 5:11] <- apply(market_data[, 5:11], 2, function(x) gsub(",", "", - x)) - market_data[, 7:11] <- apply(market_data[, 7:11], 2, function(x) gsub("-", "0", - x)) - market_data$volume <- market_data$volume %>% tidyr::replace_na(0) %>% as.numeric() - market_data$market <- market_data$market %>% tidyr::replace_na(0) %>% as.numeric() - market_data[, 5:11] <- apply(market_data[, 5:11], 2, function(x) as.numeric(x)) - market_data <- na.omit(market_data) - - market_data <- market_data %>% dplyr::mutate(close_ratio = (close - low)/(high - - low) %>% round(4) %>% as.numeric(), spread = (high - low) %>% round(2) %>% - as.numeric()) - - market_data$close_ratio <- market_data$close_ratio %>% tidyr::replace_na(0) - history_results <- market_data %>% dplyr::arrange(ranknow, date) + history_results <- market_data %>% + # create fake ranknow + dplyr::left_join(market_data %>% dplyr::group_by(symbol) %>% dplyr::arrange(desc(date)) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% + tibble::rowid_to_column("ranknow") %>% dplyr::select(slug,ranknow), by="slug") %>% + dplyr::select(slug,symbol,name,date,ranknow,open,high,low,close,volume,market) %>% + dplyr::mutate(date=lubridate::mdy(date, locale = platform_locale())) %>% + dplyr::mutate_at(vars(open,high,low,close,volume,market),~gsub(",","",.)) %>% + dplyr::mutate_at(vars(high,low,close,volume,market),~gsub("-","0",.)) %>% + dplyr::mutate_at(vars(open,high,low,close,volume,market),~as.numeric(tidyr::replace_na(.,0))) %>% + dplyr::mutate(close_ratio = (close - low)/(high - low) %>% round(4) %>% as.numeric(), + spread = (high - low) %>% round(2) %>% as.numeric()) %>% + dplyr::mutate_at(vars(close_ratio),~as.numeric(tidyr::replace_na(.,0))) %>% + dplyr::group_by(symbol) %>% + dplyr::arrange(ranknow,desc(date)) return(history_results) } diff --git a/R/crypto_list.R b/R/crypto_list.R index 2c7384a..8b9c59e 100644 --- a/R/crypto_list.R +++ b/R/crypto_list.R @@ -25,7 +25,7 @@ #' @importFrom tibble tibble #' @importFrom jsonlite fromJSON #' @importFrom lubridate today ymd -#' +#' @importFrom dplyr left_join mutate #' #' @examples #' \dontrun{ @@ -64,9 +64,13 @@ } } coins <- out_list + # always get list for data validation + json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json" + out_list_recent <- jsonlite::fromJSON(json) + # validate name & slug via symbol from recent list + coins <- coins %>% dplyr::left_join(out_list_recent %>% select(symbol,name,slug) %>% rename(slug_main=slug, name_main=name),by="symbol") %>% + mutate(name=ifelse(is.na(name_main),name,name_main),slug=ifelse(is.na(slug_main),slug,slug_main)) %>% select(symbol, name, slug, hist_date) if (is.null(end_date_hist)){ - json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json" - out_list_recent <- jsonlite::fromJSON(json) coins <- rbind(out_list,out_list_recent %>% select(name,symbol,slug) %>% dplyr::mutate(hist_date=lubridate::today())) } } else { diff --git a/man/crypto_history.Rd b/man/crypto_history.Rd index 6357331..046cc6a 100644 --- a/man/crypto_history.Rd +++ b/man/crypto_history.Rd @@ -57,6 +57,6 @@ all_coins <- crypto_history(start_date = '20180101') # Retrieve 2015 history for all 2015 crypto currencies coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") -2015_coins <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") +coins_2015 <- crypto_history(coins = "coin_list_2015", start_date = "20150101", end_date="20151231") } } From 4fe6b9b23a166a1d0d99aa910bbb668382717f1b Mon Sep 17 00:00:00 2001 From: Diffform Date: Thu, 21 Mar 2019 23:07:54 +0100 Subject: [PATCH 7/8] Adapt crypto_history to new coinlist setting, create fake "ranknow", adapt much more tidyverse-notation. --- NAMESPACE | 2 ++ R/crypto_history.R | 48 ++++++++++++++++++++----------------------- R/crypto_list.R | 10 ++++++--- man/crypto_history.Rd | 2 +- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5df7202..233b35b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,8 @@ importFrom(dplyr,"summarise") importFrom(dplyr,'%>%') importFrom(dplyr,'arrange') importFrom(dplyr,'mutate') +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(grDevices,'rgb') importFrom(httr,"GET") diff --git a/R/crypto_history.R b/R/crypto_history.R index b6d744f..1e148ce 100644 --- a/R/crypto_history.R +++ b/R/crypto_history.R @@ -31,11 +31,11 @@ #' This is the main function of the crypto package. If you want to retrieve #' ALL coins then do not pass a argument to crypto_history(), or pass the coin name. #' -#' @importFrom dplyr '%>%' 'mutate' 'arrange' +#' @importFrom dplyr '%>%' 'mutate' 'arrange' 'left_join' "group" "ungroup" "slice" #' @importFrom tidyr 'replace_na' #' @importFrom crayon 'make_style' #' @importFrom grDevices 'rgb' -#' @importFrom tibble 'tibble' 'as_tibble' +#' @importFrom tibble 'tibble' 'as_tibble' 'rowid_to_column' #' @importFrom cli 'cat_bullet' #' @importFrom lubridate 'mdy' #' @@ -53,7 +53,7 @@ #' #' # Retrieve 2015 history for all 2015 crypto currencies #' coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") -#' 2015_coins <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") +#' coins_2015 <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") #' } #' @name crypto_history #' @@ -75,7 +75,7 @@ crypto_history <- function(coins = NULL, limit = NULL, start_date = NULL, end_da message("XRP: rK59semLsuJZEWftxBFhWuNE6uhznjz2bK", appendLF = TRUE) message("\n") # only if no coins are provided - if (is.null(coins)) coins <- crypto_list(coin, start_date, end_date, coin_list) + if (is.null(coins)) coins <- crypto_list(coin=NULL, start_date, end_date, coin_list) if (!is.null(limit)) coins <- coins[1:limit, ] @@ -94,32 +94,28 @@ crypto_history <- function(coins = NULL, limit = NULL, start_date = NULL, end_da loop_data[[i]] <- scraper(to_scrape$attributes[i], to_scrape$slug[i], sleep) } - results <- do.call(rbind, loop_data) %>% tibble::as.tibble() + results <- do.call(rbind, loop_data) %>% tibble::as_tibble() if (length(results) == 0L) stop("No data currently exists for this crypto currency.", call. = FALSE) - market_data <- merge(results, coin_names, by = "slug") - colnames(market_data) <- c("slug", "date", "open", "high", "low", "close", "volume", - "market", "symbol", "name", "ranknow") - market_data <- market_data[c("slug", "symbol", "name", "date", "ranknow", "open", - "high", "low", "close", "volume", "market")] - market_data$date <- lubridate::mdy(market_data$date, locale = platform_locale()) + market_data <- results %>% left_join(coin_names, by = "slug") + colnames(market_data) <- c("date", "open", "high", "low", "close", "volume", + "market", "slug", "symbol", "name") - market_data[, 5:11] <- apply(market_data[, 5:11], 2, function(x) gsub(",", "", - x)) - market_data[, 7:11] <- apply(market_data[, 7:11], 2, function(x) gsub("-", "0", - x)) - market_data$volume <- market_data$volume %>% tidyr::replace_na(0) %>% as.numeric() - market_data$market <- market_data$market %>% tidyr::replace_na(0) %>% as.numeric() - market_data[, 5:11] <- apply(market_data[, 5:11], 2, function(x) as.numeric(x)) - market_data <- na.omit(market_data) - - market_data <- market_data %>% dplyr::mutate(close_ratio = (close - low)/(high - - low) %>% round(4) %>% as.numeric(), spread = (high - low) %>% round(2) %>% - as.numeric()) - - market_data$close_ratio <- market_data$close_ratio %>% tidyr::replace_na(0) - history_results <- market_data %>% dplyr::arrange(ranknow, date) + history_results <- market_data %>% + # create fake ranknow + dplyr::left_join(market_data %>% dplyr::group_by(symbol) %>% dplyr::arrange(desc(date)) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% + tibble::rowid_to_column("ranknow") %>% dplyr::select(slug,ranknow), by="slug") %>% + dplyr::select(slug,symbol,name,date,ranknow,open,high,low,close,volume,market) %>% + dplyr::mutate(date=lubridate::mdy(date, locale = platform_locale())) %>% + dplyr::mutate_at(vars(open,high,low,close,volume,market),~gsub(",","",.)) %>% + dplyr::mutate_at(vars(high,low,close,volume,market),~gsub("-","0",.)) %>% + dplyr::mutate_at(vars(open,high,low,close,volume,market),~as.numeric(tidyr::replace_na(.,0))) %>% + dplyr::mutate(close_ratio = (close - low)/(high - low) %>% round(4) %>% as.numeric(), + spread = (high - low) %>% round(2) %>% as.numeric()) %>% + dplyr::mutate_at(vars(close_ratio),~as.numeric(tidyr::replace_na(.,0))) %>% + dplyr::group_by(symbol) %>% + dplyr::arrange(ranknow,desc(date)) return(history_results) } diff --git a/R/crypto_list.R b/R/crypto_list.R index 2c7384a..8b9c59e 100644 --- a/R/crypto_list.R +++ b/R/crypto_list.R @@ -25,7 +25,7 @@ #' @importFrom tibble tibble #' @importFrom jsonlite fromJSON #' @importFrom lubridate today ymd -#' +#' @importFrom dplyr left_join mutate #' #' @examples #' \dontrun{ @@ -64,9 +64,13 @@ } } coins <- out_list + # always get list for data validation + json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json" + out_list_recent <- jsonlite::fromJSON(json) + # validate name & slug via symbol from recent list + coins <- coins %>% dplyr::left_join(out_list_recent %>% select(symbol,name,slug) %>% rename(slug_main=slug, name_main=name),by="symbol") %>% + mutate(name=ifelse(is.na(name_main),name,name_main),slug=ifelse(is.na(slug_main),slug,slug_main)) %>% select(symbol, name, slug, hist_date) if (is.null(end_date_hist)){ - json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json" - out_list_recent <- jsonlite::fromJSON(json) coins <- rbind(out_list,out_list_recent %>% select(name,symbol,slug) %>% dplyr::mutate(hist_date=lubridate::today())) } } else { diff --git a/man/crypto_history.Rd b/man/crypto_history.Rd index 6357331..046cc6a 100644 --- a/man/crypto_history.Rd +++ b/man/crypto_history.Rd @@ -57,6 +57,6 @@ all_coins <- crypto_history(start_date = '20180101') # Retrieve 2015 history for all 2015 crypto currencies coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months") -2015_coins <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231") +coins_2015 <- crypto_history(coins = "coin_list_2015", start_date = "20150101", end_date="20151231") } } From b034fe8d7d87e7c1797e5ba627e192dd81ba283b Mon Sep 17 00:00:00 2001 From: Diffform Date: Fri, 22 Mar 2019 20:56:12 +0100 Subject: [PATCH 8/8] Correct small eroor that made code fail all the time --- R/scraper.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/scraper.R b/R/scraper.R index 3aa6beb..b20d74b 100644 --- a/R/scraper.R +++ b/R/scraper.R @@ -39,7 +39,7 @@ scraper <- function(attributes, slug, sleep = NULL) { error = function(e) e) if (inherits(page, "error")) { - if (grep("404",page$message)){cat("No coin data found for",coin_slug,"!\n")} else { + if (grepl("404",page$message)){cat("No coin data found for",coin_slug," ",foferror,"/",page$messsage,"!\n")} else { closeAllConnections() message("\n") message(cli::cat_bullet("Rate limit hit. Sleeping for 60 seconds.", bullet = "warning", bullet_col = "red"), appendLF = TRUE)