Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Historical crypto_list based on web_snapshots at cmc #40

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,17 @@ importFrom(dplyr,"first")
importFrom(dplyr,"group_by")
importFrom(dplyr,"last")
importFrom(dplyr,"mutate")
importFrom(dplyr,"select")
importFrom(dplyr,"summarise")
importFrom(dplyr,'%>%')
importFrom(dplyr,'arrange')
importFrom(dplyr,'group_by')
importFrom(dplyr,'left_join')
importFrom(dplyr,'mutate')
importFrom(dplyr,'slice')
importFrom(dplyr,'ungroup')
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(grDevices,'rgb')
importFrom(httr,"GET")
Expand All @@ -39,13 +46,18 @@ importFrom(jsonlite,read_json)
importFrom(lubridate,"round_date")
importFrom(lubridate,'mdy')
importFrom(lubridate,today)
importFrom(lubridate,ymd)
importFrom(rstudioapi,askForSecret)
importFrom(rvest,"html_nodes")
importFrom(rvest,"html_table")
importFrom(tibble,"as.tibble")
importFrom(tibble,"as_tibble")
importFrom(tibble,'as.tibble')
importFrom(tibble,'as_tibble')
importFrom(tibble,'rowid_to_column')
importFrom(tibble,'tibble')
importFrom(tibble,tibble)
importFrom(tidyr,"separate")
importFrom(tidyr,'replace_na')
importFrom(xml2,"read_html")
importFrom(xts,"xts")
76 changes: 42 additions & 34 deletions R/crypto_history.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@
#' analysis on the crypto financial markets or to attempt
#' to predict future market movements or trends.
#'
#' @param coin string Name, symbol or slug of crypto currency, default is all tokens
#' @param coins string if NULL retrieve all currently existing coins (crypto_list()),
#' or provide list of crypto currencies in the crypto_list() format (e.g. current and dead coins since 2015)
#' @param limit integer Return the top n records, default is all tokens
#' @param start_date string Start date to retrieve data from, format 'yyyymmdd'
#' @param end_date string End date to retrieve data from, format 'yyyymmdd'
#' @param coin_list string Valid values are 'api', 'static' or NULL
#' @param sleep integer Seconds to sleep for between API requests
#
#' @return Crypto currency historic OHLC market data in a dataframe:
#' @return Crypto currency historic OHLC market data in a dataframe and additional information via attribute "info":
#' \item{slug}{Coin url slug}
#' \item{symbol}{Coin symbol}
#' \item{name}{Coin name}
#' \item{date}{Market date}
#' \item{ranknow}{Current Rank}
#' \item{open}{Market open}
#' \item{high}{Market high}
#' \item{low}{Market low}
Expand All @@ -27,15 +27,18 @@
#' \item{market}{USD Market cap}
#' \item{close_ratio}{Close rate, min-maxed with the high and low values that day}
#' \item{spread}{Volatility premium, high minus low for that day}
#' \item{start_date}{in info: Begin of historic data}
#' \item{end_date}{in info: End of historic data}
#' \item{message}{in info: Either "Success" when data was available or error message from scraper}
#'
#' This is the main function of the crypto package. If you want to retrieve
#' ALL coins then do not pass a argument to crypto_history(), or pass the coin name.
#'
#' @importFrom dplyr '%>%' 'mutate' 'arrange'
#' @importFrom dplyr '%>%' 'mutate' 'arrange' 'left_join' 'group_by' 'ungroup' 'slice'
#' @importFrom tidyr 'replace_na'
#' @importFrom crayon 'make_style'
#' @importFrom grDevices 'rgb'
#' @importFrom tibble 'tibble' 'as.tibble'
#' @importFrom tibble 'tibble' 'as_tibble' 'rowid_to_column'
#' @importFrom cli 'cat_bullet'
#' @importFrom lubridate 'mdy'
#'
Expand All @@ -50,12 +53,16 @@
#'
#' # Retrieving this years market history for ALL crypto currencies
#' all_coins <- crypto_history(start_date = '20180101')
#'
#' # Retrieve 2015 history for all 2015 crypto currencies
#' coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months")
#' coins_2015 <- crypto_history(coins = coin_list_2015, start_date = "20150101", end_date="20151231")
#' }
#' @name crypto_history
#'
#' @export
#'
crypto_history <- function(coin = NULL, limit = NULL, start_date = NULL, end_date = NULL,
crypto_history <- function(coins = NULL, limit = NULL, start_date = NULL, end_date = NULL,
coin_list = NULL, sleep = NULL) {
pink <- crayon::make_style(grDevices::rgb(0.93, 0.19, 0.65))
options(scipen = 999)
Expand All @@ -70,16 +77,16 @@ crypto_history <- function(coin = NULL, limit = NULL, start_date = NULL, end_dat
message("ERC-20: 0x375923Bf82F0b728d23A5704261a6e16341fd860", appendLF = TRUE)
message("XRP: rK59semLsuJZEWftxBFhWuNE6uhznjz2bK", appendLF = TRUE)
message("\n")

coins <- crypto_list(coin, start_date, end_date, coin_list)
# only if no coins are provided
if (is.null(coins)) coins <- crypto_list(coin=NULL, start_date, end_date, coin_list)

if (!is.null(limit))
coins <- coins[1:limit, ]

coin_names <- tibble::tibble(symbol = coins$symbol, name = coins$name, rank = coins$rank,
slug = coins$slug)
coin_names <- tibble::tibble(symbol = coins$symbol, name = coins$name,slug = coins$slug)
to_scrape <- tibble::tibble(attributes = coins$history_url, slug = coins$slug)
loop_data <- vector("list", nrow(to_scrape))
loop_info <- vector("list", nrow(to_scrape))

message(cli::cat_bullet("Scraping historical crypto data", bullet = "pointer",
bullet_col = "green"))
Expand All @@ -88,35 +95,36 @@ crypto_history <- function(coin = NULL, limit = NULL, start_date = NULL, end_dat

for (i in seq_len(nrow(to_scrape))) {
pb$tick()
loop_data[[i]] <- scraper(to_scrape$attributes[i], to_scrape$slug[i], sleep)
temp <- scraper(to_scrape$attributes[i], to_scrape$slug[i], sleep)
loop_info[[i]] <- temp$info
loop_data[[i]] <- temp$data
}

results <- do.call(rbind, loop_data) %>% tibble::as.tibble()
results <- do.call(rbind, loop_data) %>% tibble::as_tibble()
results_info <- do.call(rbind, loop_info) %>% tibble::as_tibble()

if (length(results) == 0L)
stop("No data currently exists for this crypto currency.", call. = FALSE)

market_data <- merge(results, coin_names, by = "slug")
colnames(market_data) <- c("slug", "date", "open", "high", "low", "close", "volume",
"market", "symbol", "name", "ranknow")
market_data <- market_data[c("slug", "symbol", "name", "date", "ranknow", "open",
"high", "low", "close", "volume", "market")]
market_data$date <- lubridate::mdy(market_data$date, locale = platform_locale())

market_data[, 5:11] <- apply(market_data[, 5:11], 2, function(x) gsub(",", "",
x))
market_data[, 7:11] <- apply(market_data[, 7:11], 2, function(x) gsub("-", "0",
x))
market_data$volume <- market_data$volume %>% tidyr::replace_na(0) %>% as.numeric()
market_data$market <- market_data$market %>% tidyr::replace_na(0) %>% as.numeric()
market_data[, 5:11] <- apply(market_data[, 5:11], 2, function(x) as.numeric(x))
market_data <- na.omit(market_data)

market_data <- market_data %>% dplyr::mutate(close_ratio = (close - low)/(high -
low) %>% round(4) %>% as.numeric(), spread = (high - low) %>% round(2) %>%
as.numeric())
market_data <- results %>% left_join(coin_names, by = "slug")
colnames(market_data) <- c("date", "open", "high", "low", "close", "volume",
"market", "slug", "symbol", "name")

market_data$close_ratio <- market_data$close_ratio %>% tidyr::replace_na(0)
history_results <- market_data %>% dplyr::arrange(ranknow, date)
return(history_results)
history_results <- market_data %>%
# create fake ranknow
dplyr::left_join(market_data %>% dplyr::group_by(symbol) %>% dplyr::arrange(desc(date)) %>% dplyr::slice(1) %>% dplyr::ungroup() %>%
tibble::rowid_to_column("ranknow") %>% dplyr::select(slug,ranknow), by="slug") %>%
dplyr::select(slug,symbol,name,date,ranknow,open,high,low,close,volume,market) %>%
dplyr::mutate_at(vars(open,high,low,close,volume,market),~gsub(",","",.)) %>%
dplyr::mutate_at(vars(high,low,close,volume,market),~gsub("-","0",.)) %>%
dplyr::mutate_at(vars(open,high,low,close,volume,market),~as.numeric(tidyr::replace_na(.,0))) %>%
dplyr::mutate(close_ratio = (close - low)/(high - low) %>% round(4) %>% as.numeric(),
spread = (high - low) %>% round(2) %>% as.numeric()) %>%
dplyr::mutate_at(vars(close_ratio),~as.numeric(tidyr::replace_na(.,0))) %>%
dplyr::group_by(symbol) %>%
dplyr::arrange(ranknow,desc(date))
# info output
coins_info <- coins %>% left_join(results_info,by="slug")
out <- history_results; attr(out,"info") <- coins_info
return(out)
}
68 changes: 49 additions & 19 deletions R/crypto_list.R
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
#' Retrieves name, symbol, slug and rank for all tokens
#' Retrieves name, symbol, slug and rank for all tokens at specific historic date
#'
#' List all of the crypto currencies that have existed on CoinMarketCap
#' and use this to populate the URL base for scraping historical market
#' List all of the crypto currencies that have existed on CoinMarketCap on specific date.
#' This can be used to add "dead" coins to the list of coins retrieved by `crypto_list()`.
#' Use this to populate/add to the URL base for scraping historical market
#' data. It retrieves name, slug, symbol and rank of crypto currencies from
#' CoinMarketCap and creates URLS for \code{scraper()} to use.
#'
#' @param coin Name, symbol or slug of crypto currency
#' @param start_date Start date to retrieve data from, format yyyymmdd
#' @param end_date Start date to retrieve data from, format yyyymmdd
#' @param end_date End date to retrieve data from, format yyyymmdd, if not provided, today will be assumed
#' @param start_date_hist Start date to retrieve coin history from, format yyyymmdd
#' @param end_date_hist End date to retrieve coin history from, format yyyymmdd, if not provided, today will be assumed
#' @param coin_list 'api', 'static' or NULL
#' @param date_gap 'months'
#'
#' @return Crypto currency historic OHLC market data in a dataframe:
#' \item{symbol}{Coin symbol (not-unique)}
#' \item{name}{Coin name}
#' \item{slug}{Coin URL slug (unique)}
#' \item{rank}{Current rank by market cap}
#' \item{exchange_url}{Exchange market tables urls for scraping}
#' \item{history_url}{Historical market tables urls for scraping}
#'
#' Required dependency that is used in function call \code{getCoins()}.
#' @importFrom tibble tibble
#' @importFrom jsonlite fromJSON
#' @importFrom lubridate today
#' @importFrom lubridate today ymd
#' @importFrom dplyr left_join mutate
#'
#' @examples
#' \dontrun{
Expand All @@ -30,6 +34,10 @@
#'
#' # return all coins
#' coin_list <- crypto_list()
#'
#' # return all coins listed in 2015
#' coin_list_2015 <- crypto_list(start_date_hist="20150101",end_date_hist="20151231",date_gap="months")
#'
#' }
#'
#' @name crypto_list
Expand All @@ -39,36 +47,59 @@
crypto_list <- function(coin = NULL,
start_date = NULL,
end_date = NULL,
coin_list = NULL) {
start_date_hist = NULL,
end_date_hist = NULL,
coin_list = NULL,
date_gap ="months") {
# get current coins
if (is.null(coin_list)) {
out_list <- out_list_recent <- NULL
if (!is.null(start_date_hist)){
# create dates
if (is.null(end_date_hist)) end_date_hist <- lubridate::today()
dates <- as.Date(seq(ymd(start_date_hist),ymd(end_date_hist),date_gap))
for (i in 1:length(dates)){
attributes <- paste0("https://coinmarketcap.com/historical/",format(dates[i], "%Y%m%d"),"/")
out_list <- rbind(out_list,scraper_hist(attributes, sleep = NULL) %>% dplyr::mutate(hist_date=dates[i]))
}
}
coins <- out_list
# always get list for data validation
json <- "https://s2.coinmarketcap.com/generated/search/quick_search.json"
coins <- jsonlite::fromJSON(json)
out_list_recent <- jsonlite::fromJSON(json)
# validate name & slug via symbol from recent list
if (!is.null(coins)){
coins <- coins %>% dplyr::left_join(out_list_recent %>% select(symbol,name,slug) %>% rename(slug_main=slug, name_main=name),by="symbol") %>%
mutate(name=ifelse(is.na(name_main),name,name_main),slug=ifelse(is.na(slug_main),slug,slug_main)) %>% select(symbol, name, slug, hist_date)
}
if (is.null(end_date_hist)|is.null(coins)){
coins <- rbind(out_list,out_list_recent %>% select(name,symbol,slug) %>% dplyr::mutate(hist_date=lubridate::today()))
}
} else {
ifelse(coin_list == "api",
coins <- get_coinlist_api(),
coins <- get_coinlist_static())
}

# get historic coins
if (!is.null(coin)) {
name <- coins$name
slug <- coins$slug
symbol <- coins$symbol
name <- coins$name %>% unique()
slug <- coins$slug %>% unique()
symbol <- coins$symbol %>% unique()
c1 <- subset(coins, toupper(name) %in% toupper(coin))
c2 <- subset(coins, symbol %in% toupper(coin))
c3 <- subset(coins, slug %in% tolower(coin))
coins <- tibble::tibble()
if (nrow(c1) > 0) { coins <- rbind(coins, c1) }
if (nrow(c2) > 0) { coins <- rbind(coins, c2) }
if (nrow(c3) > 0) { coins <- rbind(coins, c3) }
if (nrow(c1) > 0) { coins <- rbind(coins, c1 %>% select(-hist_date)) }
if (nrow(c2) > 0) { coins <- rbind(coins, c2 %>% select(-hist_date)) }
if (nrow(c3) > 0) { coins <- rbind(coins, c3 %>% select(-hist_date)) }
if (nrow(coins) > 1L) { coins <- unique(coins) }
}
coins <-
tibble::tibble(
symbol = coins$symbol,
name = coins$name,
slug = coins$slug,
rank = coins$rank
)
slug = coins$slug
) %>% unique
if (is.null(start_date)) { start_date <- "20130428" }
if (is.null(end_date)) { end_date <- gsub("-", "", lubridate::today()) }
exchangeurl <- paste0("https://coinmarketcap.com/currencies/", coins$slug, "/#markets")
Expand All @@ -88,6 +119,5 @@
coins$slug <- as.character(coins$slug)
coins$exchange_url <- as.character(exchange_url)
coins$history_url <- as.character(history_url)
coins$rank <- as.numeric(coins$rank)
return(coins)
}
31 changes: 19 additions & 12 deletions R/scraper.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,27 @@ scraper <- function(attributes, slug, sleep = NULL) {
error = function(e) e)

if (inherits(page, "error")) {
closeAllConnections()
message("\n")
message(cli::cat_bullet("Rate limit hit. Sleeping for 60 seconds.", bullet = "warning", bullet_col = "red"), appendLF = TRUE)
Sys.sleep(65)
page <- xml2::read_html(history_url,
handle = curl::new_handle("useragent" = "Mozilla/5.0"))
if (grepl("404",page$message)){cat("No coin data found for",coin_slug," ",foferror,"/",page$messsage,"!\n")} else {
closeAllConnections()
message("\n")
message(cli::cat_bullet("Rate limit hit. Sleeping for 60 seconds.", bullet = "warning", bullet_col = "red"), appendLF = TRUE)
Sys.sleep(65)
page <- xml2::read_html(history_url,
handle = curl::new_handle("useragent" = "Mozilla/5.0"))
}
}

table <- rvest::html_nodes(page, css = "table") %>% .[1] %>%
rvest::html_table(fill = TRUE) %>%
replace(!nzchar(.), NA)

scraper <- table[[1]] %>% tibble::as.tibble() %>%
dplyr::mutate(slug = coin_slug)
if (inherits(page, "error")){
table <- NULL
info <- tibble::tibble(slug = coin_slug, start_date=NA, end_date=NA, message = page$message)
} else {
table <- rvest::html_nodes(page, css = "table") %>% .[1] %>%
rvest::html_table(fill = TRUE) %>%
replace(!nzchar(.), NA) %>% .[[1]] %>% tibble::as.tibble() %>%
dplyr::mutate(slug = coin_slug) %>% mutate(Date=lubridate::mdy(Date, locale = platform_locale()))
info <- tibble::tibble(slug = coin_slug, start_date=min(table$Date), end_date=max(table$Date), message = "Success")
}

scraper <- list("data"=table, "info"= info)
return(scraper)
}
Loading