diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..8bc8893 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1 @@ +Readme.* \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index dc2cc88..fb085a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,11 @@ Package: AzureML Type: Package Title: Interface with Azure Machine Learning datasets and web services -Description: Functions and datasets to support Azure Machine Learning. This allows you to interact with datasets, as well as publish and consume R functions as API services. -Version: 0.2.5 -Date: 2015-12-08 +Description: Functions and datasets to support Azure Machine Learning. This + allows you to interact with datasets, as well as publish and consume R functions + as API services. +Version: 0.2.6 +Date: 2015-12-18 Authors@R: c( person("Raymond", "Laghaeian", role=c("aut", "cre"), email="raymondl@microsoft.com"), person(family="Microsoft Corporation", role="cph"), @@ -15,17 +17,20 @@ URL: https://github.com/RevolutionAnalytics/AzureML BugReports: https://github.com/RevolutionAnalytics/AzureML/issues LazyData: TRUE VignetteBuilder: knitr -SystemRequirements: Requires external zip utility, available in path. On windows, it's sufficient to install RTools. +SystemRequirements: Requires external zip utility, available in path. On + windows, it's sufficient to install RTools. Imports: - jsonlite(>= 0.9.16), - curl(>= 0.8), + jsonlite(>= 0.9.16), + curl(>= 0.8), foreign, codetools, base64enc, miniCRAN, uuid Suggests: - testthat, - knitr, + testthat, + knitr, lme4, - gbm + gbm, + MASS +RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index 70b3219..3d92d59 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,4 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand S3method(print,Datasets) S3method(print,Experiments) diff --git a/R/consume.R b/R/consume.R index 45f4550..b4e71e3 100644 --- a/R/consume.R +++ b/R/consume.R @@ -25,35 +25,36 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu { if(is.Service(endpoint)) { - if(nrow(endpoint) > 1) endpoint = endpoint[1,] - default = endpoint$DefaultEndpointName - endpoint = endpoints(attr(endpoint, "workspace"), endpoint) - endpoint = subset(endpoint, Name=default) + if(nrow(endpoint) > 1) endpoint = endpoint[1, ] + default <- endpoint$DefaultEndpointName + endpoint <- endpoints(attr(endpoint, "workspace"), endpoint) + endpoint <- subset(endpoint, Name = default) } - if(!is.Endpoint(endpoint)) stop("Invalid endpoint. Use publishWebservice() or endpoints() to create or obtain a service endpoint.") + if(!is.Endpoint(endpoint)) { + msg <- "Invalid endpoint. Use publishWebservice() or endpoints() to create or obtain a service endpoint." + stop(msg) + } - apiKey = endpoint$PrimaryKey - requestUrl = endpoint$ApiLocation + apiKey <- endpoint$PrimaryKey + requestUrl <- endpoint$ApiLocation if(missing(globalParam)) { globalParam = setNames(list(), character(0)) } # Store variable number of lists entered as a list of lists requestsLists = list(...) - if(length(requestsLists)==1 && is.data.frame(requestsLists[[1]])) - { + if(length(requestsLists)==1 && is.data.frame(requestsLists[[1]])) { requestsLists = requestsLists[[1]] - } else - { + } else { if(!is.list(requestsLists[[1]])) requestsLists = list(requestsLists) } # Make API call with parameters - result = callAPI(apiKey, requestUrl, requestsLists, globalParam, retryDelay) + result <- callAPI(apiKey, requestUrl, requestsLists, globalParam, retryDelay) if(inherits(result, "error")) stop("AzureML returned error code") + # Access output by converting from JSON into list and indexing into Results - if(!is.null(output) && output == "output1") - { + if(!is.null(output) && output == "output1") { help = endpointHelp(endpoint)$definitions$output1Item ans = data.frame(result$Results$output1) nums = which("number" == unlist(help)[grepl("\\.type$", names(unlist(help)))]) @@ -62,8 +63,9 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu if(length(logi) > 0) for(j in logi) ans[,j] = as.logical(ans[,j]) return(ans) } - if(!is.null(output) && output == "output2") + if(!is.null(output) && output == "output2") { return(fromJSON(result$Results$output2[[1]])) + } result$Results } @@ -84,30 +86,28 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu #' @importFrom jsonlite toJSON #' @importFrom curl handle_setheaders new_handle handle_setopt curl_fetch_memory #' @keywords internal -callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) -{ +callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) { # Set number of tries and HTTP status to 0 - result = NULL + result <- NULL # Construct request payload - req = list( + req <- list( Inputs = list(input1 = keyvalues), GlobalParameters = globalParam ) - body = charToRaw(paste(toJSON(req, auto_unbox=TRUE, digits=16), collapse = "\n")) - h = new_handle() - headers = list(`User-Agent`="R", - `Content-Type`="application/json", - `Authorization`=sprintf("Bearer %s", apiKey)) - handle_setheaders(h, .list=headers) + body <- charToRaw(paste(toJSON(req, auto_unbox=TRUE, digits=16), collapse = "\n")) + h <- new_handle() + headers <- list(`User-Agent` = "R", + `Content-Type` = "application/json", + `Authorization` = sprintf("Bearer %s", apiKey)) + handle_setheaders(h, .list = headers) handle_setopt(h, .list = list( - post=TRUE, - postfieldsize=length(body), - postfields=body) + post = TRUE, + postfieldsize = length(body), + postfields = body) ) - r = try_fetch(requestUrl, h, delay=retryDelay) + r = try_fetch(requestUrl, h, delay = retryDelay) result = fromJSON(rawToChar(r$content)) - if(r$status_code >= 400) - { + if(r$status_code >= 400) { stop(paste(capture.output(result), collapse="\n")) } result @@ -115,7 +115,7 @@ callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) -#' Discover web service schema +#' Discover web service schema. #' #' Discover the expected input to a web service specified by a web service ID ng the workspace ID and web service ID, information specific to the consumption functions #' @@ -130,7 +130,9 @@ callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) #' #' @family discovery functions #' @export -discoverSchema <- function(helpURL, scheme = "https", host = "ussouthcentral.services.azureml.net", api_version = "2.0") +discoverSchema <- function(helpURL, scheme = "https", + host = "ussouthcentral.services.azureml.net", + api_version = "2.0") { workspaceId = getDetailsFromUrl(helpURL)[1] endpointId = getDetailsFromUrl(helpURL)[3] diff --git a/R/datasets.R b/R/datasets.R index 2cc11b9..f631674 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -95,10 +95,12 @@ download.datasets <- function(source, name, ...) #' @export #' @family dataset functions #' @family experiment functions -download.intermediate.dataset <- function(ws, experiment, node_id, port_name="Results dataset", data_type_id="GenericCSV", ...) +download.intermediate.dataset <- function(ws, experiment, node_id, + port_name = "Results dataset", + data_type_id = "GenericCSV", ...) { url = sprintf("%s/workspaces/%s/experiments/%s/outputdata/%s/%s", - ws$.baseuri, curl_escape(ws$id), + ws$.studioapi, curl_escape(ws$id), curl_escape(experiment), curl_escape(node_id), curl_escape(port_name)) h = new_handle() @@ -128,7 +130,7 @@ download.intermediate.dataset <- function(ws, experiment, node_id, port_name="Re #' @example inst/examples/example_upload.R upload.dataset <- function(x, ws, name, description = "", family_id="", ...) { - if(!is.Workspace(ws)) stop("ws must be a Workspace object") + stopIfNotWorkspace(ws) if(name %in% datasets(ws)$Name) { msg <- sprintf("A dataset with the name '%s' already exists in AzureML", name) stop(msg) @@ -141,7 +143,7 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) # Step 1 tsv = capture.output(write.table(x, file = "", sep = "\t", row.names = FALSE, ...)) url = sprintf("%s/resourceuploads/workspaces/%s/?userStorage=true&dataTypeId=GenericTSV", - ws$.baseuri, curl_escape(ws$id)) + ws$.studioapi, curl_escape(ws$id)) h = new_handle() hdr = ws$.headers hdr["Content-Type"] = "text/plain" @@ -169,7 +171,7 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) ClientPoll = TRUE), auto_unbox=TRUE) url = sprintf("%s/workspaces/%s/datasources", - ws$.baseuri, curl_escape(ws$id)) + ws$.studioapi, curl_escape(ws$id)) handle_reset(h) # Preserves connection, cookies handle_setheaders(h, .list=ws$.headers) body = charToRaw(paste(metadata, collapse="\n")) @@ -185,6 +187,8 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) ws$datasets[ws$datasets$Id == id, ] } + + #' Delete datasets from an AzureML workspace. #' #' @inheritParams refresh @@ -193,25 +197,28 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) #' @return A data frame with columns Name, Deleted, status_code indicating the HTTP status code and success/failure result of the delete operation for each dataset. #' @family dataset functions #' @export -delete.datasets <- function(ws, name, host="https://studioapi.azureml.net/api") -{ +delete.datasets <- function(ws, name, host){ + stopIfNotWorkspace(ws) # https://studioapi.azureml.net/api/workspaces//datasources/family/ HTTP/1.1 - datasets = name + datasets <- name refresh(ws, "datasets") - if(!inherits(datasets, "Datasets")) - { - datasets = datasets(ws) - datasets = datasets[datasets$Name %in% name, ] + if(!inherits(datasets, "Datasets")){ + datasets <- datasets(ws) + datasets <- datasets[datasets$Name %in% name, ] } - h = new_handle() - handle_setheaders(h, .list=ws$.headers) - handle_setopt(h, customrequest="DELETE") - status_code = vapply(datasets$FamilyId, function(familyId) - { - uri = sprintf("%s/workspaces/%s/datasources/family/%s", host, - curl_escape(ws$id), curl_escape(familyId)) - try_fetch(uri, h)$status_code - }, 1, USE.NAMES=FALSE) + h <- new_handle() + handle_setheaders(h, .list = ws$.headers) + handle_setopt(h, customrequest = "DELETE") + status_code <- vapply(datasets$FamilyId, + function(familyId){ + uri <- sprintf("%s/workspaces/%s/datasources/family/%s", + ws$.studioapi, + curl_escape(ws$id), + curl_escape(familyId) + ) + try_fetch(uri, h)$status_code + }, 1, USE.NAMES = FALSE + ) ans = data.frame( Name = datasets$Name, Deleted=status_code < 300, diff --git a/R/discover.R b/R/discover.R index d391033..e57fda1 100644 --- a/R/discover.R +++ b/R/discover.R @@ -60,9 +60,9 @@ #' @export services <- function(ws, service_id, name, host = ws$.management_endpoint) { - if(!is.Workspace(ws)) stop("ws must be an AzureML Workspace object") - h = new_handle() - headers = list(`User-Agent`="R", + stopIfNotWorkspace(ws) + h <- new_handle() + headers <- list(`User-Agent`="R", `Content-Type`="application/json;charset=UTF8", `Authorization`=sprintf("Bearer %s",ws$.auth), `Accept`="application/json") @@ -71,12 +71,12 @@ services <- function(ws, service_id, name, host = ws$.management_endpoint) if(missing(service_id)) service_id = "" else service_id = sprintf("/%s", service_id) - r = curl( + r <- curl( sprintf("%s/workspaces/%s/webservices%s", host, ws$id, service_id), - handle=h + handle = h ) on.exit(close(r)) - ans = tryCatch(fromJSON(readLines(r, warn=FALSE)), error=function(e) NULL) + ans <- tryCatch(fromJSON(readLines(r, warn = FALSE)), error = function(e) NULL) attr(ans, "workspace") = ws if(!missing(name)) { ans = ans[ans$Name == name,] @@ -91,7 +91,7 @@ services <- function(ws, service_id, name, host = ws$.management_endpoint) #' @rdname services #' @export -getWebServices = services +getWebServices <- services #' List AzureML Web Service Endpoints #' @@ -144,7 +144,7 @@ getWebServices = services #' @export endpoints <- function(ws, service_id, endpoint_id, host = ws$.management_endpoint) { - if(!is.Workspace(ws)) stop("ws must be an AzureML Workspace object") + stopIfNotWorkspace(ws) # if(is.list(service_id) || is.data.frame(service_id)) service_id = service_id$Id[1] if(is.Service(service_id)) service_id = service_id$Id[1] @@ -217,16 +217,19 @@ endpointHelp <- function(e, type = c("apidocument", "r-snippet","score","jobs"," { type = match.arg(type) rsnip = FALSE - if(type=="r-snippet") - { + if(type=="r-snippet") { type = "score" rsnip = TRUE } uri = e$HelpLocation[1] + # XXX This is totally nuts, and not documented, but help hosts vary depending on type. # Arrghhh... - if(type == "apidocument") + if(type == "apidocument"){ uri = gsub("studio.azureml.net/apihelp", "management.azureml.net", uri) + uri = gsub("studio.azureml-int.net/apihelp", "management.azureml-int.net", uri) + } + pattern = "\\s]+))?)+\\s*|\\s*)/?>" con = curl(paste(uri, type, sep="/")) text = paste( @@ -239,10 +242,9 @@ endpointHelp <- function(e, type = c("apidocument", "r-snippet","score","jobs"," collapse="\n" ) close(con) - if(rsnip) - { + if(rsnip) { text = substr(text, - grepRaw("code-snippet-r",text)+nchar("code-snippet-r")+2,nchar(text) + grepRaw("code-snippet-r", text) + nchar("code-snippet-r") + 2, nchar(text) ) } if(type == "apidocument") text = fromJSON(text) diff --git a/R/internal.R b/R/internal.R index 7b59141..b43209c 100644 --- a/R/internal.R +++ b/R/internal.R @@ -32,22 +32,34 @@ date_origin = "1970-1-1" #' @param delay in seconds between retries, subject to exponent #' @param exponent increment each successive delay by delay^exponent #' @return the result of curl_fetch_memory(uri, handle) -try_fetch <- function(uri, handle, retry_on=c(503,504,509,400,401,440), tries=3, delay=10, exponent=1.2) +try_fetch <- function(uri, handle, + retry_on = c(400, 401, 440, 503, 504, 509), + tries = 6, + delay = 1, exponent = 2) { - i = 0 - while(i < tries) - { + collisions = 1 + while(collisions < tries) { r = curl_fetch_memory(uri, handle) if(!(r$status_code %in% retry_on)) return(r) - if(i == 0) - message(sprintf("Request failed with status %s. Retrying request...", r$status_code)) - Sys.sleep(delay) - delay = delay^exponent - i = i + 1 + wait_time = delay * (2 ^ collisions - 1) + wait_time <- ceiling(runif(1, min = 0.001, max = wait_time)) + message(sprintf("Request failed with status %s. Waiting %s seconds before retry", + r$status_code, + wait_time)) + for(i in 1:wait_time){ + message(".", appendLF = FALSE) + Sys.sleep(1) + } + message("\n") + collisions = collisions + 1 } r } +# urlAPIinsert <- function(x, text = "api"){ +# gsub("(http.*?)(\\..*)", sprintf("\\1%s\\2", text), x) +# } + urlconcat <- function(a,b) { ans = paste(gsub("/$", "", a), b, sep="/") @@ -66,23 +78,25 @@ get_datasets <- function(ws) { h = new_handle() handle_setheaders(h, .list=ws$.headers) - r = curl(sprintf("%s/workspaces/%s/datasources", ws$.baseuri, ws$id), handle=h) + r = curl(sprintf("%s/workspaces/%s/datasources", ws$.studioapi, ws$id), handle=h) on.exit(close(r)) x = tryCatch(fromJSON(readLines(r, warn=FALSE)), error=invisible) - if(is.null(x) || is.na(x$Name[1])) - { + if(is.null(x) || is.na(x$Name[1])){ x = data.frame() class(x) = c("Datasets", "data.frame") return(x) } # Use strict variable name matching to look up data d = x[,"DownloadLocation"] - x$DownloadLocation = paste(d[,"BaseUri"], d[,"Location"], + x$DownloadLocation = paste(d[,"BaseUri"], + d[,"Location"], d[,"AccessCredential"], sep="") d = x[,"VisualizeEndPoint"] - x$VisualizeEndPoint = paste(d[,"BaseUri"], d[,"AccessCredential"], sep="") + x$VisualizeEndPoint = paste(d[,"BaseUri"], + d[,"AccessCredential"], sep="") d = x[,"SchemaEndPoint"] - x$SchemaEndPoint = paste(d[,"BaseUri"], d[,"Location"], + x$SchemaEndPoint = paste(d[,"BaseUri"], + d[,"Location"], d[,"AccessCredential"], sep="") class(x) = c("Datasets", "data.frame") x @@ -109,7 +123,7 @@ get_experiments <- function(ws) { h = new_handle() handle_setheaders(h, .list=ws$.headers) - r = curl(sprintf("%s/workspaces/%s/experiments", ws$.baseuri, ws$id), handle=h) + r = curl(sprintf("%s/workspaces/%s/experiments", ws$.studioapi, ws$id), handle=h) on.exit(close(r)) x = fromJSON(readLines(r, warn=FALSE)) # Use strict variable name matching to look up data @@ -204,7 +218,7 @@ packageEnv <- function(exportenv, packages=NULL, version="3.1.0") } z = try({ - zip(zipfile="export.zip", files=dir()) + zip(zipfile="export.zip", files=dir(), flags = "-r9Xq") }) if(inherits(z, "error") || z > 0) stop("Unable to create zip file") setwd(cwd) diff --git a/R/makeConfig.R b/R/makeConfig.R new file mode 100644 index 0000000..decd5a9 --- /dev/null +++ b/R/makeConfig.R @@ -0,0 +1,19 @@ +makeConfig <- function(id = NULL, authorization_token = NULL, + api_endpoint = NULL, management_endpoint = NULL, file){ + x <- list( + id = id, + authorization_token = authorization_token, + api_endpoint = api_endpoint, + management_endpoint = management_endpoint + ) + conf <- list( + workspace = x[!sapply(x, is.null)] + ) + js <- jsonlite::toJSON(conf, pretty = TRUE) + # browser() + if(!missing(file) && !is.null(file)) { + writeLines(js, con = file) + } else { + js + } +} diff --git a/R/methods.R b/R/methods.R index 01e5ff5..6b21275 100644 --- a/R/methods.R +++ b/R/methods.R @@ -21,13 +21,28 @@ # THE SOFTWARE. -#' @title Test if an object is an Azure ML workspace. +stopIfNotWorkspace <- function(x){ + if(!is.Workspace(x)) { + msg <- paste0("Error in ", as.character(sys.call(-1))[1], "()\n", + "ws must be a Workspace object. See ?workspace" + ) + stop(msg, call. = FALSE) + } +} + + +#' Test if an object is an Azure ML workspace. +#' #' @param x an R object #' @return logical value, TRUE if \code{x} represents an Azure ML workspace. #' @export -is.Workspace <- function(x) "Workspace" %in% class(x) +is.Workspace <- function(x){ + inherits(x, "Workspace") +} + -#' @title Test if an object is an Azure ML Service. +#' Test if an object is an Azure ML Service. +#' #' @param x an R object #' @return logical value, TRUE if \code{x} represents an Azure ML web service #' @export @@ -35,7 +50,8 @@ is.Service <- function(x){ inherits(x, "Service") } -#' @title Test if an object is an Azure ML Endpoint +#' Test if an object is an Azure ML Endpoint. +#' #' @param x an R object #' @return logical value, TRUE if \code{x} represents an Azure ML web service endpoint #' @export @@ -44,11 +60,15 @@ is.Endpoint <- function(x){ } #' @export -print.Workspace = function(x, ...) +print.Workspace = function(x, detail = FALSE, ...) { cat("AzureML Workspace\n") - cat("Workspace ID: ", x$id, "\n") - cat("API endpoint:", x$.api_endpoint, "\n") + cat("Workspace ID :", x$id, "\n") + cat("API endpoint :", x$.api_endpoint, "\n") + if(detail){ + cat("Studio API :", x$.studioapi, "\n") + cat("Management endpoint :", x$.management_endpoint, "\n") + } } #' @export diff --git a/R/publish.R b/R/publish.R index ab849d1..4aeff95 100644 --- a/R/publish.R +++ b/R/publish.R @@ -151,7 +151,7 @@ publishWebService <- function(ws, fun, name, version="3.1.0", serviceId, host = ws$.management_endpoint) { # Perform validation on inputs - if(!is.Workspace(ws)) stop("ws must be a workspace object") + stopIfNotWorkspace(ws) if(!zipAvailable()) stop(zipNotAvailableMessage) if(is.character(fun)) stop("You must specify 'fun' as a function, not a character") if(!is.function(fun)) stop("The argument 'fun' must be a function.") @@ -263,7 +263,7 @@ deleteWebService <- function(ws, name, refresh = TRUE) { #DELETE https://management.azureml.net/workspaces/{id}/webservices/{id}[/endpoints/{name}] - if(!is.Workspace(ws)) stop("Invalid ws. Please provide a workspace object") + stopIfNotWorkspace(ws) if(is.data.frame(name) || is.list(name)){ name = name$Id[1] } else { diff --git a/R/workspace.R b/R/workspace.R index e6a2483..135fca7 100644 --- a/R/workspace.R +++ b/R/workspace.R @@ -21,8 +21,31 @@ # THE SOFTWARE. -api_endpoint_default <- "https://studio.azureml.net" -management_endpoint_default <- "https://management.azureml.net" + +default_api <- function(api_endpoint = "https://studioapi.azureml.net"){ + defaults <- list( + + "https://studioapi.azureml.net" = list( + api_endpoint = "https://studioapi.azureml.net", + management_endpoint = "https://management.azureml.net", + studioapi = "https://studioapi.azureml.net/api" + + ), "https://studioapi.azureml-int.net" = list( + + api_endpoint = "https://studio.azureml-int.net", + management_endpoint = "https://management.azureml-int.net", + studioapi = "https://studioapi.azureml-int.net/api" + + ) + ) + + + if(api_endpoint %in% names(defaults)){ + defaults[api_endpoint][[1]] + } else { + stop("api_endpoint not recognized") + } +} #' Create a reference to an AzureML Studio workspace. #' @@ -59,27 +82,39 @@ management_endpoint_default <- "https://management.azureml.net" #' @seealso \code{\link{datasets}}, \code{\link{experiments}}, \code{\link{refresh}}, #' \code{\link{services}}, \code{\link{consume}}, \code{\link{publishWebService}} workspace <- function(id, auth, api_endpoint, management_endpoint, - config="~/.azureml/settings.json") + config="~/.azureml/settings.json") { if(missing(id) || missing(auth) || missing(api_endpoint) || missing(management_endpoint)) { - if(!file.exists(config)) stop("missing file ", config) - s = fromJSON(file(config)) + if(!file.exists(config)) stop(sprintf("config file is missing: '%s'", config)) + settings = tryCatch(fromJSON(file(config)), + error = function(e)e + ) + if(inherits(settings, "error")) { + msg <- sprintf("Your config file contains invalid json", config) + msg <- paste(msg, settings$message, sep = "\n\n") + stop(msg, call. = FALSE) + } if(missing(id)){ - id <- s[["workspace"]][["id"]] + id <- settings[["workspace"]][["id"]] } if(missing(auth)){ - auth <- s[["workspace"]][["authorization_token"]] + auth <- settings[["workspace"]][["authorization_token"]] } if(missing(api_endpoint)){ - api_endpoint <- s[["workspace"]][["api_endpoint"]] + api_endpoint <- settings[["workspace"]][["api_endpoint"]] } if(missing(management_endpoint)){ - management_endpoint <- s[["workspace"]][["management_endpoint"]] + management_endpoint <- settings[["workspace"]][["management_endpoint"]] } } - if(!exists("api_endpoint")) api_endpoint <- api_endpoint_default - if(!exists("management_endpoint")) management_endpoint <- management_endpoint_default + default_api <- if(is.null(api_endpoint)) { + default_api() + } else { + default_api(api_endpoint) + } + if(is.null(api_endpoint)) api_endpoint <- default_api[["api_endpoint"]] + if(is.null(management_endpoint)) management_endpoint <- default_api[["management_endpoint"]] # test to see if api_endpoint is a valid url resp <- tryCatch( @@ -95,17 +130,19 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, ) if(inherits(resp, "error")) stop("Invalid management_endpoint: ", management_endpoint) - e = new.env() - class(e) = "Workspace" - e$id = id - e$.auth = auth - e$.api_endpoint = api_endpoint - e$.management_endpoint = management_endpoint - e$.baseuri = urlconcat(api_endpoint, "api") - e$.headers = list(`User-Agent`="R", - `Content-Type`="application/json;charset=UTF8", - `x-ms-client-session-id`="DefaultSession", - `x-ms-metaanalytics-authorizationtoken`=auth) + e <- new.env() + class(e) <- "Workspace" + e$id <- id + e$.auth <- auth + e$.api_endpoint <- api_endpoint + e$.management_endpoint <- management_endpoint + e$.studioapi <- default_api[["studioapi"]] + e$.headers <- list( + `User-Agent` = "R", + `Content-Type` = "application/json;charset=UTF8", + `x-ms-client-session-id` = "DefaultSession", + `x-ms-metaanalytics-authorizationtoken` = auth + ) delayedAssign("experiments", get_experiments(e), assign.env=e) delayedAssign("datasets", get_datasets(e), assign.env=e) delayedAssign("services", services(e), assign.env=e) @@ -125,9 +162,9 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, refresh <- function(ws, what=c("everything", "datasets", "experiments", "services")) { what = match.arg(what) - if(what %in% c("everything", "experiments")) ws$experiments = get_experiments(ws) - if(what %in% c("everything", "datasets")) ws$datasets = get_datasets(ws) - if(what %in% c("everything", "services")) ws$services = services(ws) + if(what %in% c("everything", "experiments")) ws$experiments <- get_experiments(ws) + if(what %in% c("everything", "datasets")) ws$datasets <- get_datasets(ws) + if(what %in% c("everything", "services")) ws$services <- services(ws) invisible() } @@ -148,6 +185,7 @@ refresh <- function(ws, what=c("everything", "datasets", "experiments", "service #' @example inst/examples/example_datasets.R datasets <- function(ws, filter=c("all", "my datasets", "samples")) { + stopIfNotWorkspace(ws) filter = match.arg(filter) if(filter == "all") return(ws$datasets) samples = filter == "samples" diff --git a/man/AzureML-deprecated.Rd b/man/AzureML-deprecated.Rd index cca7436..fe49100 100644 --- a/man/AzureML-deprecated.Rd +++ b/man/AzureML-deprecated.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/azureml-defunct.R \name{consumeDataframe} \alias{consumeDataframe} diff --git a/man/AzureML-package.Rd b/man/AzureML-package.Rd index 216c46e..4eb5509 100644 --- a/man/AzureML-package.Rd +++ b/man/AzureML-package.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/azureml-package.R \docType{package} \name{AzureML-package} diff --git a/man/azureSchema.Rd b/man/azureSchema.Rd index 89d6f98..461ec01 100644 --- a/man/azureSchema.Rd +++ b/man/azureSchema.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{azureSchema} \alias{azureSchema} diff --git a/man/callAPI.Rd b/man/callAPI.Rd index 5caa17e..f9d3067 100644 --- a/man/callAPI.Rd +++ b/man/callAPI.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{callAPI} \alias{callAPI} diff --git a/man/consume.Rd b/man/consume.Rd index 06a6afb..0d3c6cc 100644 --- a/man/consume.Rd +++ b/man/consume.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{consume} \alias{consume} @@ -154,7 +154,7 @@ deleteWebService(ws, "addme") # A neat trick to evaluate any expression in the Azure ML virtual # machine R session and view its output: ep <- publishWebService(ws, - fun = function(expr) { + fun = function(expr) { paste(capture.output( eval(parse(text=expr))), collapse="\\n") }, diff --git a/man/datasets.Rd b/man/datasets.Rd index 78a8346..f7070fa 100644 --- a/man/datasets.Rd +++ b/man/datasets.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{datasets} \alias{datasets} @@ -53,8 +53,8 @@ an R data.frame, you can alternatively filter on any variable as desired. \seealso{ \code{\link{workspace}}, \code{\link{experiments}}, \code{\link{download.datasets}} -Other dataset functions: \code{\link{delete.datasets}}; - \code{\link{download.intermediate.dataset}}; - \code{\link{upload.dataset}}; \code{\link{workspace}} +Other dataset functions: \code{\link{delete.datasets}}, + \code{\link{download.intermediate.dataset}}, + \code{\link{upload.dataset}}, \code{\link{workspace}} } diff --git a/man/delete.datasets.Rd b/man/delete.datasets.Rd index cdf1ab7..62c1d04 100644 --- a/man/delete.datasets.Rd +++ b/man/delete.datasets.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{delete.datasets} \alias{delete.datasets} \title{Delete datasets from an AzureML workspace.} \usage{ -delete.datasets(ws, name, host = "https://studioapi.azureml.net/api") +delete.datasets(ws, name, host) } \arguments{ \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} @@ -20,8 +20,8 @@ A data frame with columns Name, Deleted, status_code indicating the HTTP status Delete datasets from an AzureML workspace. } \seealso{ -Other dataset functions: \code{\link{datasets}}; - \code{\link{download.intermediate.dataset}}; - \code{\link{upload.dataset}}; \code{\link{workspace}} +Other dataset functions: \code{\link{datasets}}, + \code{\link{download.intermediate.dataset}}, + \code{\link{upload.dataset}}, \code{\link{workspace}} } diff --git a/man/deleteWebService.Rd b/man/deleteWebService.Rd index dc5c2ee..01805b8 100644 --- a/man/deleteWebService.Rd +++ b/man/deleteWebService.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{deleteWebService} \alias{deleteWebService} @@ -149,7 +149,7 @@ deleteWebService(ws, "addme") # A neat trick to evaluate any expression in the Azure ML virtual # machine R session and view its output: ep <- publishWebService(ws, - fun = function(expr) { + fun = function(expr) { paste(capture.output( eval(parse(text=expr))), collapse="\\n") }, @@ -279,6 +279,6 @@ deleteWebService(ws, "sleepy lmer") \code{\link{services}} \code{\link{publishWebService}} \code{\link{updateWebService}} Other publishing functions: \code{\link{publishWebService}}, - \code{\link{updateWebService}}; \code{\link{workspace}} + \code{\link{workspace}} } diff --git a/man/discoverSchema.Rd b/man/discoverSchema.Rd index 280f29f..c0f8581 100644 --- a/man/discoverSchema.Rd +++ b/man/discoverSchema.Rd @@ -1,8 +1,8 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{discoverSchema} \alias{discoverSchema} -\title{Discover web service schema} +\title{Discover web service schema.} \usage{ discoverSchema(helpURL, scheme = "https", host = "ussouthcentral.services.azureml.net", api_version = "2.0") @@ -25,9 +25,8 @@ Discover the expected input to a web service specified by a web service ID ng th \seealso{ \code{\link{publishWebService}} \code{\link{consume}} \code{\link{workspace}} \code{link{services}} \code{\link{endpoints}} \code{\link{endpointHelp}} -Other discovery functions: \code{\link{endpointHelp}}; - \code{\link{endpoints}}, \code{\link{getEndpoints}}; - \code{\link{getWebServices}}, \code{\link{services}}; +Other discovery functions: \code{\link{endpointHelp}}, + \code{\link{endpoints}}, \code{\link{services}}, \code{\link{workspace}} } diff --git a/man/download.datasets.Rd b/man/download.datasets.Rd index 2c555f5..c3b3f7e 100644 --- a/man/download.datasets.Rd +++ b/man/download.datasets.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{download.datasets} \alias{download.datasets} diff --git a/man/download.intermediate.dataset.Rd b/man/download.intermediate.dataset.Rd index 3b8d50b..ea97c71 100644 --- a/man/download.intermediate.dataset.Rd +++ b/man/download.intermediate.dataset.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{download.intermediate.dataset} \alias{download.intermediate.dataset} @@ -37,11 +37,11 @@ This function can download datasets with various CSV and TSV \code{DataTypeId} ( \seealso{ \code{\link{workspace}}, \code{\link{datasets}}, \code{\link[utils]{read.table}} and \code{\link{download.datasets}} -Other dataset functions: \code{\link{datasets}}; - \code{\link{delete.datasets}}; - \code{\link{upload.dataset}}; \code{\link{workspace}} +Other dataset functions: \code{\link{datasets}}, + \code{\link{delete.datasets}}, + \code{\link{upload.dataset}}, \code{\link{workspace}} -Other experiment functions: \code{\link{experiments}}; +Other experiment functions: \code{\link{experiments}}, \code{\link{workspace}} } diff --git a/man/endpointHelp.Rd b/man/endpointHelp.Rd index 9083bc8..b72a8fa 100644 --- a/man/endpointHelp.Rd +++ b/man/endpointHelp.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discover.R \name{endpointHelp} \alias{endpointHelp} @@ -40,9 +40,8 @@ endpointHelp(e[1,])$definitions } } \seealso{ -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpoints}}, \code{\link{getEndpoints}}; - \code{\link{getWebServices}}, \code{\link{services}}; +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpoints}}, \code{\link{services}}, \code{\link{workspace}} } diff --git a/man/endpoints.Rd b/man/endpoints.Rd index 4df22ca..c277110 100644 --- a/man/endpoints.Rd +++ b/man/endpoints.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discover.R \name{endpoints} \alias{endpoints} @@ -65,8 +65,8 @@ getEndpoints(ws, s$Id[1]) } } \seealso{ -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpointHelp}}; \code{\link{getWebServices}}, - \code{\link{services}}; \code{\link{workspace}} +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpointHelp}}, \code{\link{services}}, + \code{\link{workspace}} } diff --git a/man/experiments.Rd b/man/experiments.Rd index f9ebc62..267a53c 100644 --- a/man/experiments.Rd +++ b/man/experiments.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{experiments} \alias{experiments} @@ -40,7 +40,7 @@ List experiments in an AzureML workspace, optionally filtering on sample or my e \seealso{ \code{\link{workspace}}, \code{\link{datasets}}, \code{\link{download.intermediate.dataset}} -Other experiment functions: \code{\link{download.intermediate.dataset}}; +Other experiment functions: \code{\link{download.intermediate.dataset}}, \code{\link{workspace}} } diff --git a/man/getDetailsFromUrl.Rd b/man/getDetailsFromUrl.Rd index 12e8733..01613d3 100644 --- a/man/getDetailsFromUrl.Rd +++ b/man/getDetailsFromUrl.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{getDetailsFromUrl} \alias{getDetailsFromUrl} diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 006f69a..580e1f7 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{get_dataset} \alias{get_dataset} diff --git a/man/get_datasets.Rd b/man/get_datasets.Rd index e023cc0..c5fcbcb 100644 --- a/man/get_datasets.Rd +++ b/man/get_datasets.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{get_datasets} \alias{get_datasets} diff --git a/man/get_experiments.Rd b/man/get_experiments.Rd index fbb6c75..defb177 100644 --- a/man/get_experiments.Rd +++ b/man/get_experiments.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{get_experiments} \alias{get_experiments} diff --git a/man/is.Endpoint.Rd b/man/is.Endpoint.Rd index 8735f5a..25bc628 100644 --- a/man/is.Endpoint.Rd +++ b/man/is.Endpoint.Rd @@ -1,8 +1,8 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{is.Endpoint} \alias{is.Endpoint} -\title{Test if an object is an Azure ML Endpoint} +\title{Test if an object is an Azure ML Endpoint.} \usage{ is.Endpoint(x) } @@ -13,6 +13,6 @@ is.Endpoint(x) logical value, TRUE if \code{x} represents an Azure ML web service endpoint } \description{ -Test if an object is an Azure ML Endpoint +Test if an object is an Azure ML Endpoint. } diff --git a/man/is.Service.Rd b/man/is.Service.Rd index fb6e707..35e7492 100644 --- a/man/is.Service.Rd +++ b/man/is.Service.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{is.Service} \alias{is.Service} diff --git a/man/is.Workspace.Rd b/man/is.Workspace.Rd index 52267df..70aeb75 100644 --- a/man/is.Workspace.Rd +++ b/man/is.Workspace.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{is.Workspace} \alias{is.Workspace} diff --git a/man/packageEnv.Rd b/man/packageEnv.Rd index 55eb98a..114e293 100644 --- a/man/packageEnv.Rd +++ b/man/packageEnv.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{packageEnv} \alias{packageEnv} diff --git a/man/publishWebService.Rd b/man/publishWebService.Rd index 33048f1..c06d54f 100644 --- a/man/publishWebService.Rd +++ b/man/publishWebService.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{publishWebService} \alias{publishWebService} @@ -199,7 +199,7 @@ deleteWebService(ws, "addme") # A neat trick to evaluate any expression in the Azure ML virtual # machine R session and view its output: ep <- publishWebService(ws, - fun = function(expr) { + fun = function(expr) { paste(capture.output( eval(parse(text=expr))), collapse="\\n") }, @@ -328,7 +328,7 @@ deleteWebService(ws, "sleepy lmer") \seealso{ \code{\link{endpoints}}, \code{\link{discoverSchema}}, \code{\link{consume}} and \code{\link{services}}. -Other publishing functions: \code{\link{deleteWebService}}; +Other publishing functions: \code{\link{deleteWebService}}, \code{\link{workspace}} } diff --git a/man/refresh.Rd b/man/refresh.Rd index faf5096..ddf5fbf 100644 --- a/man/refresh.Rd +++ b/man/refresh.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{refresh} \alias{refresh} diff --git a/man/services.Rd b/man/services.Rd index f71a42c..d930154 100644 --- a/man/services.Rd +++ b/man/services.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discover.R \name{services} \alias{getWebServices} @@ -53,8 +53,8 @@ getWebServices(ws) } } \seealso{ -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpointHelp}}; \code{\link{endpoints}}, - \code{\link{getEndpoints}}; \code{\link{workspace}} +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpointHelp}}, \code{\link{endpoints}}, + \code{\link{workspace}} } diff --git a/man/test_wrapper.Rd b/man/test_wrapper.Rd index c971e8c..8da16f6 100644 --- a/man/test_wrapper.Rd +++ b/man/test_wrapper.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{test_wrapper} \alias{test_wrapper} diff --git a/man/try_fetch.Rd b/man/try_fetch.Rd index 2a97d7a..8f7c6d9 100644 --- a/man/try_fetch.Rd +++ b/man/try_fetch.Rd @@ -1,11 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{try_fetch} \alias{try_fetch} \title{Try to fetch a uri/handle, retrying on certain returned status codes after a timeout} \usage{ -try_fetch(uri, handle, retry_on = c(503, 504, 509, 400, 401, 440), - tries = 3, delay = 10, exponent = 1.2) +try_fetch(uri, handle, retry_on = c(400, 401, 440, 503, 504, 509), + tries = 6, delay = 1, exponent = 2) } \arguments{ \item{uri}{the uri to fetch} diff --git a/man/upload.dataset.Rd b/man/upload.dataset.Rd index 6a2698a..e0d9ae2 100644 --- a/man/upload.dataset.Rd +++ b/man/upload.dataset.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{upload.dataset} \alias{upload.dataset} @@ -46,9 +46,9 @@ The AzureML API does not support uploads for _replacing_ datasets with new data } } \seealso{ -Other dataset functions: \code{\link{datasets}}; - \code{\link{delete.datasets}}; - \code{\link{download.intermediate.dataset}}; +Other dataset functions: \code{\link{datasets}}, + \code{\link{delete.datasets}}, + \code{\link{download.intermediate.dataset}}, \code{\link{workspace}} } diff --git a/man/workspace.Rd b/man/workspace.Rd index defa93c..9d49130 100644 --- a/man/workspace.Rd +++ b/man/workspace.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{workspace} \alias{workspace} @@ -45,21 +45,19 @@ If any of the \code{id}, \code{auth}, \code{api_endpoint} or \code{management_en Other consumption functions: \code{\link{consume}} -Other dataset functions: \code{\link{datasets}}; - \code{\link{delete.datasets}}; - \code{\link{download.intermediate.dataset}}; +Other dataset functions: \code{\link{datasets}}, + \code{\link{delete.datasets}}, + \code{\link{download.intermediate.dataset}}, \code{\link{upload.dataset}} -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpointHelp}}; \code{\link{endpoints}}, - \code{\link{getEndpoints}}; \code{\link{getWebServices}}, +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpointHelp}}, \code{\link{endpoints}}, \code{\link{services}} -Other experiment functions: \code{\link{download.intermediate.dataset}}; +Other experiment functions: \code{\link{download.intermediate.dataset}}, \code{\link{experiments}} -Other publishing functions: \code{\link{deleteWebService}}; - \code{\link{publishWebService}}, - \code{\link{updateWebService}} +Other publishing functions: \code{\link{deleteWebService}}, + \code{\link{publishWebService}} } diff --git a/tests/testthat.R b/tests/testthat.R index 1e16d2e..2945efa 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,2 +1,3 @@ -library(testthat) +Sys.setenv("R_TESTS" = "") +library(testthat, quietly = TRUE) test_check("AzureML") diff --git a/tests/testthat/test-1-workspace.R b/tests/testthat/test-1-workspace.R index b8ba62d..70da399 100644 --- a/tests/testthat/test-1-workspace.R +++ b/tests/testthat/test-1-workspace.R @@ -4,33 +4,61 @@ settingsFile <- "~/.azureml/settings.json" if(file.exists(settingsFile)) { context("Connect to workspace") - + test_that("Can connect to workspace with supplied id and auth", { js <- jsonlite::fromJSON(settingsFile) id <- js$workspace$id auth <- js$workspace$authorization_token - + expect_true(!is.null(id)) expect_true(!is.null(auth)) - + ws <- workspace(id, auth) - + expect_is(ws, c("Workspace")) expect_equal(ls(ws), c("datasets", "experiments", "id", "services")) expect_equal(ws$id, id) }) - + test_that("Can connect to workspace with config file", { skip_on_cran() skip_on_travis() - + ws <- workspace() - + expect_is(ws, c("Workspace")) expect_equal(ls(ws), c("datasets", "experiments", "id", "services")) }) - + } else { message("To run tests, add a file ~/.azureml/settings.json containing AzureML keys, see ?workspace for help") message("No tests ran") } + +context("Reading from settings.json file") + +test_that("Add api_endpoint and management_endpoint if missing from config", { + tf <- tempfile(fileext = ".json") + on.exit(unlink(tf)) + makeConfig("x", "y", file = tf) + ws <- workspace(config = tf) + expect_equal(ws$id, "x") + expect_equal(ws$.api_endpoint, default_api(ws$.api_endpoint)[["api_endpoint"]]) + expect_equal(ws$.management_endpoint, default_api(ws$.api_endpoint)[["management_endpoint"]]) +}) + +test_that("Add api_endpoint and management_endpoint if missing from config", { + expect_error(workspace(config = "file_does_not_exist"), + "config file is missing: 'file_does_not_exist'") +}) + +test_that("Throws helpful error if config is invalid json", { + tf <- tempfile(fileext = ".json") + on.exit(unlink(tf)) + writeLines("garbage", con = tf) + msg <- tryCatch(workspace(config = tf), error = function(e)e)$message + expect_true( + grepl("Your config file contains invalid json", msg) + ) +}) + diff --git a/tests/testthat/test-2-datasets-upload-download-delete.R b/tests/testthat/test-2-datasets-upload-download-delete.R index 954ccee..aac49fc 100644 --- a/tests/testthat/test-2-datasets-upload-download-delete.R +++ b/tests/testthat/test-2-datasets-upload-download-delete.R @@ -28,8 +28,9 @@ if(file.exists(settingsFile)) test_that("Can delete dataset from workspace", { z <- delete.datasets(ws, timestamped_name) - expect_true(timestamped_name %in% z$Name) - # refresh(ws) + expect_true(timestamped_name %in% z$Name && z$Deleted[z$Name == timestamped_name]) + # Force refresh - sometime this fails in non-interactive + Sys.sleep(1); refresh(ws, what = "datasets") ds <- datasets(ws, filter = "my") expect_false(timestamped_name %in% ds$Name) }) diff --git a/tests/testthat/test-4-download-each-dataset-type.R b/tests/testthat/test-4-download-each-dataset-type.R index 2225223..a942b67 100644 --- a/tests/testthat/test-4-download-each-dataset-type.R +++ b/tests/testthat/test-4-download-each-dataset-type.R @@ -7,6 +7,9 @@ if(file.exists(settingsFile)) ws <- workspace() ds <- datasets(ws, filter = "samples") + ds$Name + testIdx <- grepl("[Tt]est", ds$Name) + ds <- ds[!testIdx, ] unique(ds$DataTypeId) oneOfEach <- do.call( diff --git a/tests/testthat/test-5-publish.R b/tests/testthat/test-5-publish.R index 8ad088b..e3a0d15 100644 --- a/tests/testthat/test-5-publish.R +++ b/tests/testthat/test-5-publish.R @@ -134,3 +134,22 @@ test_that("publishWebService works with data frame input", { deleteWebService(ws, timestamped_name) }) + +test_that("try_fetch gives exponential retry messages",{ + set.seed(1) + with_mock( + curl_fetch_memory = function(...){ + retry_on = c(400, 401, 440, 503, 504, 509) + status_code <- if(runif(1) > 0.26) sample(retry_on, 1) else 200 + list(status_code = status_code, contents = NA) + }, { + msg <- "Request failed with status 440. Waiting 1 seconds before retry\n" + expect_message( + try_fetch(delay = 0.25), + msg + ) + }, + .env = "curl" + ) + +}) diff --git a/vignettes/getting_started.R b/vignettes/getting_started.R index 8c8c518..40a57da 100644 --- a/vignettes/getting_started.R +++ b/vignettes/getting_started.R @@ -11,13 +11,10 @@ ws <- workspace() ws ## ------------------------------------------------------------------------ -head(datasets(ws)) - -# Or, equivalently: -head(ws$datasets) +head(datasets(ws)) # Or, equivalently: head(ws$datasets) ## ------------------------------------------------------------------------ -ws$datasets$Owner +head(ws$datasets$Owner, n=20) ## ------------------------------------------------------------------------ airports <- download.datasets(ws, name = "Airport Codes Dataset", quote="\"") @@ -55,7 +52,7 @@ ws <- workspace() api <- publishWebService( ws, fun = add, - name = "AzureML-vignette-add", + name = "AzureML-vignette-silly", inputSchema = list( x = "numeric", y = "numeric" @@ -70,45 +67,76 @@ class(api) names(api) ## ----help---------------------------------------------------------------- -helpPageUrl <- api$HelpLocation -helpPageUrl +(helpPageUrl <- api$HelpLocation) ## ----update-------------------------------------------------------------- api <- updateWebService( ws, - fun = add, - name = "AzureML-vignette-add", + fun = function(x, y) x - y, inputSchema = list( - x = "numeric", + x = "numeric", y = "numeric" - ), + ), outputSchema = list( ans = "numeric" ), - wsid = api$WorkspaceId # <<-- Note you must add wsid to update! + serviceId = api$WebServiceId # <<-- Required to update! ) ## ----webservice---------------------------------------------------------- -webservices <- services(ws, name = "AzureML-vignette-add") +(webservices <- services(ws, name = "AzureML-vignette-silly")) ## ----endpoints----------------------------------------------------------- -ep <- endpoints(ws, webservices[1, ]$Id) +ep <- endpoints(ws, webservices[1, ]) class(ep) names(ep) -## ----discover------------------------------------------------------------ -discoverSchema(ep$HelpLocation) - ## ----df------------------------------------------------------------------ df <- data.frame( x = 1:5, y = 6:10 ) -s <- services(ws, name = "AzureML-vignette-add") +s <- services(ws, name = "AzureML-vignette-silly") s <- tail(s, 1) # use the last published function, in case of duplicate function names -ep <- endpoints(ws, s$Id) +ep <- endpoints(ws, s) consume(ep, df) ## ----delete-------------------------------------------------------------- -deleteWebService(ws, name = "AzureML-vignette-add") +deleteWebService(ws, name = "AzureML-vignette-silly") + +## ------------------------------------------------------------------------ +library(AzureML) +library(MASS) +library(gbm) + +ws <- workspace() +test <- Boston[1:5, 1:13] + +set.seed(123) +gbm1 <- gbm(medv ~ ., + distribution = "gaussian", + n.trees = 5000, + interaction.depth = 8, + n.minobsinnode = 1, + shrinkage = 0.01, + cv.folds = 5, + data = Boston, + n.cores = 1) # You can set this to n.cores = NULL to use all cores +best.iter <- gbm.perf(gbm1, method="cv", plot=FALSE) + +mypredict <- function(newdata) +{ + require(gbm) + predict(gbm1, newdata, best.iter) +} + +# Example use of the prediction function +print(mypredict(test)) + +# Publish the service +ep <- publishWebService(ws = ws, fun = mypredict, name = "AzureML-vignette-gbm", + inputSchema = test) + +# Consume test data, comparing with result above +print(consume(ep, test)) diff --git a/vignettes/getting_started.Rmd b/vignettes/getting_started.Rmd index c35afa5..a0c83b5 100644 --- a/vignettes/getting_started.Rmd +++ b/vignettes/getting_started.Rmd @@ -412,7 +412,8 @@ gbm1 <- gbm(medv ~ ., n.minobsinnode = 1, shrinkage = 0.01, cv.folds = 5, - data = Boston) + data = Boston, + n.cores = 1) # You can set this to n.cores = NULL to use all cores best.iter <- gbm.perf(gbm1, method="cv", plot=FALSE) mypredict <- function(newdata) diff --git a/vignettes/getting_started.html b/vignettes/getting_started.html index 3cda4bd..3e28687 100644 --- a/vignettes/getting_started.html +++ b/vignettes/getting_started.html @@ -1,79 +1,221 @@ + + + - +Installation instructions - + + + + - - - + + - + -
- + + + +

Use this package to upload and download datasets to and from AzureML, to +interrogate experiments, to publish R functions as AzureML web services, and to +run R data through existing web services and retrieve the output.

-

Use this package to upload and download datasets to and from AzureML, to interrogate experiments, to publish R functions as AzureML web services, and to run R data through existing web services and retrieve the output.

-

Installation instructions

+

Install the development version of the package directly from GitHub with:

-
# Install devtools
+
+
# Install devtools
 if(!require("devtools")) install.packages("devtools")
-devtools::install_github("RevolutionAnalytics/azureml")
+devtools::install_github("RevolutionAnalytics/azureml") +
+

The package depends on:

+
  • jsonlite
  • curl
  • @@ -81,84 +223,155 @@

    Installation instructions

  • base64enc
  • uuid
+

Some of the examples use data and functions in:

+
  • lme4
  • ggplot2
-
-
+

Overview

-

AzureML provides an interface to publish web services on Microsoft Azure Machine Learning (Azure ML) from your local R environment. The main functions in the package cover the following topics:

+ +

AzureML provides an interface to publish web services on Microsoft Azure +Machine Learning (Azure ML) from your local R environment. The main functions +in the package cover the following topics:

+
  • Workspace: connect to and manage AzureML workspaces
  • Datasets: upload and download datasets to and from AzureML workspaces
  • Publish: publish R functions as AzureML web services, and update or delete existing services
  • Consume: apply any AzureML web service to your R data
-
+

Getting Started

-

To get started, please navigate to AzureML Studio and create a free account (not guest) or use your existing AzureML account. After logging in, under the “Settings” tab, copy and paste your Workspace ID from the “Name” sub-tab into your R console. From the “Authorization Tokens” sub-tab, copy your Primary Authorization Token into your R console. You will need this information to access all package functionality.

-

The package defines a Workspace class that represents an AzureML work space. Most of the functions in the package refer to a Workspace object directly or indirectly. Use the workspace() function to create Workspace objects, either by explicitly specifying an AzureML workspace ID and authorization token. Workspace objects are simply R environments that actively cache details about your AzureML sessions.

-
-
+ +

To get started, please navigate to AzureML Studio +and create a free account (not guest) or use your existing AzureML account. +After logging in, under the “Settings” tab, copy and paste your Workspace ID +from the “Name” sub-tab into your R console. From the “Authorization Tokens” +sub-tab, copy your Primary Authorization Token into your R console. You will +need this information to access all package functionality.

+ +

The package defines a Workspace class that represents an AzureML work space. +Most of the functions in the package refer to a Workspace object directly or +indirectly. Use the workspace() function to create Workspace objects, either +by explicitly specifying an AzureML workspace ID and authorization token. +Workspace objects are simply R environments that actively cache details about +your AzureML sessions.

+

Obtaining AzureML Credentials

-

Before using the package, it is necessary to first obtain the security credentials to your Azure Machine Learning workspace. You can find this be logging in at https://studio.azureml.net. If you do not have an account, you can create a free account (not guest) to use these APIs.

-

Once logged in, you will be brought to the Studio landing page. Using the left-hand menu, navigate to the ‘Settings’ tab to find your Workspace ID. Note this, or copy it into your R session and store it is a variable, e.g. myWsID.

+ +

Before using the package, it is necessary to first obtain the security +credentials to your Azure Machine Learning workspace. You can find this be +logging in at https://studio.azureml.net. If you do not +have an account, you can create a free account (not guest) to use these APIs.

+ +

Once logged in, you will be brought to the Studio landing page. Using the +left-hand menu, navigate to the 'Settings' tab to find your Workspace ID. Note +this, or copy it into your R session and store it is a variable, e.g. myWsID.

+

-

Next, within the ‘Settings’ tab, use the overhead menu to navigate to the ‘Authorization Tokens’ tab and similarly note your Primary Authorization Token.

+ +

Next, within the 'Settings' tab, use the overhead menu to navigate to the +'Authorization Tokens' tab and similarly note your Primary Authorization Token.

+

-
library(AzureML)
+
+
library(AzureML)
 ws <- workspace(
   id = "your workspace ID",
   auth = "your authorization token"
-)
-

or alternatively create a file in ~/.azureml/settings.json with the JSON structure (api_endpoint and management_endpoint are optional):

-
{"workspace": {
+)
+
+ +

or alternatively create a file in ~/.azureml/settings.json with the JSON +structure (api_endpoint and management_endpoint are optional):

+ +
{"workspace": {
    "id"                  : "test_id",
    "authorization_token" : "test_token",
    "api_endpoint"        : "api_endpoint",
    "management_endpoint" : "management_endpoint"
-}}
+}} +
+

See ?workspace for more details.

-
-
+

Examining workspace datasets, experiments and services

-

The datasets(), experiments(), and services() functions return data frames that contain information about those objects available in the workspace.

-

The package caches R data frame objects describing available datasets, experiments and services in the workspace environment. That cache can be refreshed at any time with the refresh() function. The data frame objects make it relatively easy to sort and filter the datasets, experiments, and services in arbitrary ways. The functions also include filtering options for specific and common filters, like looking up something by name.

-

Use the download.datasets() and upload.dataset() functions to download or upload data between R and your Azure workspace. The download.intermediate.dataset() function can download ephemeral data from a port in an experiment that is not explicitly stored in your Azure workspace.

+ +

The datasets(), experiments(), and services() functions return data +frames that contain information about those objects available in the workspace.

+ +

The package caches R data frame objects describing available datasets, +experiments and services in the workspace environment. That cache can be +refreshed at any time with the refresh() function. The data frame objects +make it relatively easy to sort and filter the datasets, experiments, and +services in arbitrary ways. The functions also include filtering options for +specific and common filters, like looking up something by name.

+ +

Use the download.datasets() and upload.dataset() functions to download or +upload data between R and your Azure workspace. The +download.intermediate.dataset() function can download ephemeral data from a +port in an experiment that is not explicitly stored in your Azure workspace.

+

Use delete.datasets() to remove and delete datasets from the workspace.

-

The endpoints() function describes Azure web service endpoints, and works with supporting help functions like endpointHelp().

-

The publishWebService() function publishes a custom R function as an AzureML web service, available for use by any client. The updateWebService() and deleteWebServce() update or delete existing web services, respectively.

-

Use the consume() function to evaluate an Azure ML web service with new data uploaded to AzureML from your R environment.

-
-
-
+ +

The endpoints() function describes Azure web service endpoints, and works with +supporting help functions like endpointHelp().

+ +

The publishWebService() function publishes a custom R function as an AzureML +web service, available for use by any client. The updateWebService() and +deleteWebServce() update or delete existing web services, respectively.

+ +

Use the consume() function to evaluate an Azure ML web service with +new data uploaded to AzureML from your R environment.

+

Examples

-

Work with the AzureML package begins by defining a workspace object. The example below uses the configured workspace ID and authorization token in the ~/.azureml/settings.json file. Alternatively specify these settings explicitly in the workspace() function as outlined above. All of the examples require this step.

-
library(AzureML)
+
+

Work with the AzureML package begins by defining a workspace object. The +example below uses the configured workspace ID and authorization token in the +~/.azureml/settings.json file. Alternatively specify these settings +explicitly in the workspace() function as outlined above. All of the examples +require this step.

+ +
library(AzureML)
 ws <- workspace()
-ws
+ws +
+
## AzureML Workspace
-## Workspace ID:  a2760707c7fa4245a057680427f31b17
-
+## Workspace ID : 68ef5aa9196142799d10bedd43f8254c +## API endpoint : https://studioapi.azureml.net + +

Service availability

-

AzureML is a web service and sometimes operations can’t immediately proceed due to rate limiting or other factors. When this kind of thing occurs, the AzureML R package presents a warning and retries the service a few times before giving up with an error.

-
-
+ +

AzureML is a web service and sometimes operations can't +immediately proceed due to rate limiting or other factors. When this +kind of thing occurs, the AzureML R package presents a warning and +retries the service a few times before giving up with an error.

+

Datasets

-

AzureML datasets correspond more or less to R data frames. The AzureML package defines four basic dataset operations: list, upload, download, and delete.

-
+ +

AzureML datasets correspond more or less to R data frames. The AzureML +package defines four basic dataset operations: list, upload, download, and +delete.

+

List available datasets

+

The following example illustrates listing available datasets in your workspace.

-
head(datasets(ws))     # Or, equivalently: head(ws$datasets)
-
##                                           Name DataTypeId Size ...
-## 1 Result Dataset (saved from Execute R Script)    Dataset 3535 ...
-## 2     dataset-test-upload-2015-11-06--04-35-29 GenericTSV 2901 ...
-## 3     dataset-test-upload-2015-11-06--12-26-32 GenericTSV 2901 ...
-## 4     dataset-test-upload-2015-11-17--18-00-22 GenericTSV 2901 ...
-## 5     dataset-test-upload-2015-11-18--09-51-45 GenericTSV 2901 ...
-## 6     dataset-test-upload-2015-11-18--09-59-35 GenericTSV 2901 ...
+
+
head(datasets(ws))     # Or, equivalently: head(ws$datasets)
+
+ +
##                                       Name         DataTypeId  Size ...
+## 1                   text.preprocessing.zip                Zip  2782 ...
+## 2                    fraudTemplateUtil.zip                Zip  3471 ...
+## 3 Sample Named Entity Recognition Articles GenericTSVNoHeader   236 ...
+## 4                       Breast cancer data               ARFF 15170 ...
+## 5                        Forest fires data               ARFF 26285 ...
+## 6                      Iris Two Class Data               ARFF  2004 ...
 ## ----------------------------------------------
 ## AzureML datasets data.frame variables include:
 ##  [1] "VisualizeEndPoint"    "SchemaEndPoint"       "SchemaStatus"        
@@ -169,38 +382,61 @@ 

List available datasets

## [16] "PromotedFrom" "UploadedFromFilename" "ServiceVersion" ## [19] "IsLatest" "Category" "DownloadLocation" ## [22] "IsDeprecated" "Culture" "Batch" -## [25] "CreatedDateTicks"
-

The list of datasets is presented as an a R data frame with class Datasets. Its print method shows a summary of the datasets, along with all of the available variables. Use any normal R data frame operation to manipulate the datasets. For example, to see the “Owner” value of each dataset:

-
head(ws$datasets$Owner, n=20)
-
##  [1] "bwaynelewis"           "R"                    
-##  [3] "R"                     "R"                    
-##  [5] "R"                     "R"                    
-##  [7] "R"                     "Microsoft Corporation"
+## [25] "CreatedDateTicks"
+
+ +

The list of datasets is presented as an a R data frame with class Datasets. +Its print method shows a summary of the datasets, along with all of the +available variables. Use any normal R data frame operation to manipulate the +datasets. For example, to see the “Owner” value of each dataset:

+ +
head(ws$datasets$Owner, n=20)
+
+ +
##  [1] "Microsoft Corporation" "Microsoft Corporation"
+##  [3] "Microsoft Corporation" "Microsoft Corporation"
+##  [5] "Microsoft Corporation" "Microsoft Corporation"
+##  [7] "Microsoft Corporation" "Microsoft Corporation"
 ##  [9] "Microsoft Corporation" "Microsoft Corporation"
 ## [11] "Microsoft Corporation" "Microsoft Corporation"
 ## [13] "Microsoft Corporation" "Microsoft Corporation"
 ## [15] "Microsoft Corporation" "Microsoft Corporation"
 ## [17] "Microsoft Corporation" "Microsoft Corporation"
-## [19] "Microsoft Corporation" "Microsoft Corporation"
-
-
+## [19] "Microsoft Corporation" "Microsoft Corporation" + +

Downloading datasets

-

The next example illustrates downloading a specific dataset named “Airport Codes Dataset” from AzureML to your R session. This dataset is presented by AzureML as a “Generic CSV” dataset, and will be parsed by R’s read.table() function. (Other formats are parsed by an appropriate parser, for example read.arff().) The example illustrates passing additional arguments to the read.table() function used to parse the data from AzureML in this case.

-
airports <- download.datasets(ws, name = "Airport Codes Dataset", quote="\"")
-head(airports)
+ +

The next example illustrates downloading a specific dataset named “Airport +Codes Dataset” from AzureML to your R session. This dataset is presented by +AzureML as a “Generic CSV” dataset, and will be parsed by R's read.table() +function. (Other formats are parsed by an appropriate parser, for example +read.arff().) The example illustrates passing additional arguments to the +read.table() function used to parse the data from AzureML in this case.

+ +
airports <- download.datasets(ws, name = "Airport Codes Dataset", quote="\"")
+head(airports)
+
+
##   airport_id        city state                                 name
 ## 1      10165 Adak Island    AK                                 Adak
 ## 2      10299   Anchorage    AK  Ted Stevens Anchorage International
 ## 3      10304       Aniak    AK                        Aniak Airport
 ## 4      10754      Barrow    AK      Wiley Post/Will Rogers Memorial
 ## 5      10551      Bethel    AK                       Bethel Airport
-## 6      10926     Cordova    AK                Merle K Mudhole Smith
-

You can use download.datasets() to download more than one dataset as a time, returning the results in a list of data frames.

-
-
+## 6 10926 Cordova AK Merle K Mudhole Smith + + +

You can use download.datasets() to download more than one dataset as a time, +returning the results in a list of data frames.

+

Uploading R data frames as AzureML datasets and deleting datasets

+

Use the upload.dataset() function to upload R data frames to AzureML.

-
upload.dataset(airquality, ws, name = "Air quality")
+ +
upload.dataset(airquality, ws, name = "Air quality")
+
+
##          Name DataTypeId Size ...
 ## 1 Air quality GenericTSV 2901 ...
 ## ----------------------------------------------
@@ -213,28 +449,45 @@ 

Uploading R data frames as AzureML datasets and deleting datasets

## [16] "PromotedFrom" "UploadedFromFilename" "ServiceVersion" ## [19] "IsLatest" "Category" "DownloadLocation" ## [22] "IsDeprecated" "Culture" "Batch" -## [25] "CreatedDateTicks"
-
# Let's see what we've got:
-head(download.datasets(ws, name = "Air quality"))
+## [25] "CreatedDateTicks" + + +
# Let's see what we've got:
+head(download.datasets(ws, name = "Air quality"))
+
+
##   Ozone Solar.R Wind Temp Month Day
 ## 1    41     190  7.4   67     5   1
 ## 2    36     118  8.0   72     5   2
 ## 3    12     149 12.6   74     5   3
 ## 4    18     313 11.5   62     5   4
 ## 5    NA      NA 14.3   56     5   5
-## 6    28      NA 14.9   66     5   6
+## 6 28 NA 14.9 66 5 6 + +

Delete one or more AzureML datasets with delete.datasets():

-
delete.datasets(ws, name="Air quality")
-
## Request failed with status 400. Retrying request...
+ +
delete.datasets(ws, name="Air quality")
+
+ +
## Request failed with status 400. Waiting 1 seconds before retry
+## .
+
+
##          Name Deleted status_code
-## 1 Air quality    TRUE         204
-
-
-
+## 1 Air quality TRUE 204 + +

Experiments

-

Use the experiments() function or simply use the ws$experiments data frame object directly to list details about experiments in your AzureML workspace. The experiments() function optionally filters experiments by ownership.

-
e <- experiments(ws, filter = "samples")
-head(e)
+ +

Use the experiments() function or simply use the ws$experiments data frame +object directly to list details about experiments in your AzureML workspace. +The experiments() function optionally filters experiments by ownership.

+ +
e <- experiments(ws, filter = "samples")
+head(e)
+
+
##                                        Description        CreationTime ...
 ## 1  Sample 6: Train, Test, Evaluate for Regression: 2015-08-27 21:34:57 ...
 ## 2 Text Classification: Step 2 of 5, text preproces 2015-08-27 21:39:38 ...
@@ -261,23 +514,40 @@ 

Experiments

## [15] "CreationTime" ## [16] "StartTime" ## [17] "EndTime" -## [18] "Metadata"
-

The ws$experiments object is just an R data frame with class Experiments. Its print method shows a summary of the available experiments, but it can otherwise be manipulated like a normal R data frame.

-

The list of experiments in your workspace is cached in the workspace environment. Use the refresh() function to explicitly update the cache at any time, for example:

-
refresh(ws, "experiments")
-
-
-
+## [18] "Metadata" + + +

The ws$experiments object is just an R data frame with class Experiments. +Its print method shows a summary of the available experiments, but it can +otherwise be manipulated like a normal R data frame.

+ +

The list of experiments in your workspace is cached in the workspace +environment. Use the refresh() function to explicitly update the cache at any +time, for example:

+ +
refresh(ws, "experiments")
+
+

Web Services

-

The AzureML package helps you to publish R functions as AzureML web services that can be consumed anywhere. You can also use the AzureML package to run R data through an existing web service and collect the output.

-
+ +

The AzureML package helps you to publish R functions as AzureML web services +that can be consumed anywhere. You can also use the AzureML package to run R +data through an existing web service and collect the output.

+

Publishing a Web Service

-

The publishWebService() publishes an R function as an AzureML web service. Consider this simple example R function:

-
add <- function(x, y) {
+
+

The publishWebService() publishes an R function as an AzureML web service. +Consider this simple example R function:

+ +
add <- function(x, y) {
   x + y
-}
-

Use the function publishWebService() to publish the function as a service named “AzureML-vignette-silly”:

-
ws <- workspace()
+}
+
+ +

Use the function publishWebService() to publish the function as a +service named “AzureML-vignette-silly”:

+ +
ws <- workspace()
 api <- publishWebService(
   ws,
   fun = add, 
@@ -289,25 +559,62 @@ 

Publishing a Web Service

outputSchema = list( ans = "numeric" ) -)
-

The example publishes a function of two scalar numeric arguments, returning a single numeric scalar output value. Note that we explicitly define the web service input and output schema in the example. See the examples below for more flexible ways of defining web services with functions of data frames.

-

The result of publishWebService() is an Endpoint object, really just an R data frame with two elements: a list containing the details of the newly created web service, and a list of the endpoints of the web service. From here, you can pass the information on to another user, or use the information to use the web service from R:

-
class(api)
-
## [1] "Endpoint"   "data.frame"
-
names(api)
+) +
+ +

The example publishes a function of two scalar numeric arguments, returning a +single numeric scalar output value. Note that we explicitly define the web +service input and output schema in the example. See the examples below for more +flexible ways of defining web services with functions of data frames.

+ +

The result of publishWebService() is an Endpoint object, really just an R +data frame with two elements: a list containing the details of the newly +created web service, and a list of the endpoints of the web service. From here, +you can pass the information on to another user, or use the information to use +the web service from R:

+ +
class(api)
+
+ +
## [1] "Endpoint"   "data.frame"
+
+ +
names(api)
+
+
##  [1] "Name"                  "Description"          
 ##  [3] "CreationTime"          "WorkspaceId"          
 ##  [5] "WebServiceId"          "HelpLocation"         
 ##  [7] "PrimaryKey"            "SecondaryKey"         
-##  [9] "ApiLocation"           "MaxConcurrentCalls"   
-## [11] "DiagnosticsTraceLevel" "ThrottleLevel"
-

The web service created is identical to a web service published through the Azure Machine Learning Studio. From the response, you can get the Web Service’s URL, API Key and Help Page URL, as shown above. The first two are needed to make calls to the web service. The latter has the sample code, sample request and other information for consuming the API from client apps such as mobile and web applications.

-

The new web service will show up on the ‘Web Services’ tab of the Studio interface, and the service will have a help page for each endpoint, e.g.

+## [9] "ApiLocation" "PreventUpdate" +## [11] "MaxConcurrentCalls" "DiagnosticsTraceLevel" +## [13] "ThrottleLevel" + + +

The web service created is identical to a web service published through the +Azure Machine Learning Studio. From the response, you can get the Web Service's +URL, API Key and Help Page URL, as shown above. The first two are needed to +make calls to the web service. The latter has the sample code, sample request +and other information for consuming the API from client apps such as mobile and +web applications.

+ +

The new web service will show up on the 'Web Services' tab of the Studio +interface, and the service will have a help page for each endpoint, e.g.

+

Note that AzureML allows multiple services to have the same name.

-
(helpPageUrl <- api$HelpLocation)
-
## [1] "https://studio.azureml.net/apihelp/workspaces/a2760707c7fa4245a057680427f31b17/webservices/666b3068900f11e5b1665c94780a34d3/endpoints/e84e9f1701ec4b5b884d24786ced9a30"
-

Once published, you can update a web service using the updateWebService() or publishWebService() functions. The updateWebService() function is just an alias for publishWebService(), except that the argument serviceId is compulsory.

-
api <- updateWebService(
+
+
(helpPageUrl <- api$HelpLocation)
+
+ +
## [1] "https://studio.azureml.net/apihelp/workspaces/68ef5aa9196142799d10bedd43f8254c/webservices/c83e7b30a56b11e5a91011e99dfb4827/endpoints/208ba34b4bf14c12ba50684429f6590e"
+
+ +

Once published, you can update a web service using the updateWebService() or +publishWebService() functions. The updateWebService() function is just an +alias for publishWebService(), except that the argument serviceId is +compulsory.

+ +
api <- updateWebService(
   ws,
   fun = function(x, y) x - y,
   inputSchema = list(
@@ -318,68 +625,119 @@ 

Publishing a Web Service

ans = "numeric" ), serviceId = api$WebServiceId # <<-- Required to update! -)
-

The “AzureML-vignette-silly” service now substracts two numbers instead of adding them.

-
-
+) + + +

The “AzureML-vignette-silly” service now substracts two numbers instead of adding them.

+

Discovering Web Services

-

Use the services() function to list in detail all of the available services in your AzureML workspace, or filter by web service name as shown below:

-
(webservices <- services(ws, name = "AzureML-vignette-silly"))
-
##                                  Id                   Name
-## 31 666b3068900f11e5b1665c94780a34d3 AzureML-vignette-silly
-##                CreationTime                      WorkspaceId
-## 31 2015-11-21T05:19:14.227Z a2760707c7fa4245a057680427f31b17
-##    DefaultEndpointName EndpointCount
-## 31             default             1
-

Given a service, use the endpoints() function to list the AzureML service endpoints for the service:

-
ep <- endpoints(ws, webservices[1, ])
-class(ep)
-
## [1] "Endpoint"   "data.frame"
-
names(ep)
+ +

Use the services() function to list in detail all of the available services +in your AzureML workspace, or filter by web service name as shown below:

+ +
(webservices <- services(ws, name = "AzureML-vignette-silly"))
+
+ +
##                                 Id                   Name
+## 5 c83e7b30a56b11e5a91011e99dfb4827 AzureML-vignette-silly
+##               CreationTime                      WorkspaceId
+## 5 2015-12-18T09:43:24.779Z 68ef5aa9196142799d10bedd43f8254c
+##   DefaultEndpointName EndpointCount
+## 5             default             1
+
+ +

Given a service, use the endpoints() function to list the AzureML +service endpoints for the service:

+ +
ep <- endpoints(ws, webservices[1, ])
+class(ep)
+
+ +
## [1] "Endpoint"   "data.frame"
+
+ +
names(ep)
+
+
##  [1] "Name"                  "Description"          
 ##  [3] "CreationTime"          "WorkspaceId"          
 ##  [5] "WebServiceId"          "HelpLocation"         
 ##  [7] "PrimaryKey"            "SecondaryKey"         
-##  [9] "ApiLocation"           "MaxConcurrentCalls"   
-## [11] "DiagnosticsTraceLevel" "ThrottleLevel"
-

The returned Endpoints object contains all the information needed to consume a web service. The endpointHelp() function returns detailed information about an endpoint including its input and output schema and URI.

-
-
+## [9] "ApiLocation" "PreventUpdate" +## [11] "MaxConcurrentCalls" "DiagnosticsTraceLevel" +## [13] "ThrottleLevel" + + +

The returned Endpoints object contains all the information needed to consume a web +service. The endpointHelp() function returns detailed information about an endpoint +including its input and output schema and URI.

+

Consuming Web Services

-

Use the consume() function to send data to your newly published web service API for scoring.

-
df <- data.frame(
+
+

Use the consume() function to send data to your newly published web service +API for scoring.

+ +
df <- data.frame(
   x = 1:5,
   y = 6:10
 )
 s <- services(ws, name = "AzureML-vignette-silly")
 s <- tail(s, 1) # use the last published function, in case of duplicate function names
 ep <- endpoints(ws, s)
-consume(ep, df)
+consume(ep, df) +
+
##   ans
-## 1  -5
-## 2  -5
-## 3  -5
-## 4  -5
-## 5  -5
-

Alternatively, the endpoint primary key and API location can be found on the help page for that specific endpoint, which can be found on Azure Machine Learning Studio. Using the Help Page URL, you can access sample code to build clients that can consume this web service in real time to make predictions.

-
-
+## 1 7 +## 2 9 +## 3 11 +## 4 13 +## 5 15 + + +

Alternatively, the endpoint primary key and API location can be found on the +help page for that specific endpoint, which can be found on Azure Machine +Learning Studio. Using the Help Page URL, you can access sample code to build +clients that can consume this web service in real time to make predictions.

+

Deleting a Web Service

-

Use deleteWebservice() to remove a webservice endpoint that you no longer need or want (like these silly examples):

-
deleteWebService(ws, name = "AzureML-vignette-silly")
-
-
+ +

Use deleteWebservice() to remove a webservice endpoint that you no longer need +or want (like these silly examples):

+ +
deleteWebService(ws, name = "AzureML-vignette-silly")
+
+

Other examples of publishing web services

-

The simplest and perhaps most useful way to define a web service uses functions that take a single data frame argument and return a vector or data frame of results. The next example trains a generalized boosted regression model using the gbm package, publishes the model as a web service with name “AzureML-vignette-gbm”, and runs example data through the model for prediction using the consume() function.

-
library(AzureML)
+
+

The simplest and perhaps most useful way to define a web service uses functions +that take a single data frame argument and return a vector or data frame of +results. The next example trains a generalized boosted regression model using +the gbm package, publishes the model as a web service with name +“AzureML-vignette-gbm”, and runs example data through the model for prediction +using the consume() function.

+ +
library(AzureML)
 library(MASS)
-library(gbm)
+
+ +
## Warning: package 'MASS' was built under R version 3.1.3
+
+ +
library(gbm)
+
+ +
## Warning: package 'gbm' was built under R version 3.1.3
+
+
## Loading required package: survival
-## Loading required package: lattice
 ## Loading required package: splines
+## Loading required package: lattice
 ## Loading required package: parallel
-## Loaded gbm 2.1.1
-
ws <- workspace()
+## Loaded gbm 2.1.1
+
+ +
ws <- workspace()
 test <- Boston[1:5, 1:13]
 
 set.seed(123)
@@ -390,7 +748,8 @@ 

Other examples of publishing web services

n.minobsinnode = 1, shrinkage = 0.01, cv.folds = 5, - data = Boston) + data = Boston, + n.cores = 1) # You can set this to n.cores = NULL to use all cores best.iter <- gbm.perf(gbm1, method="cv", plot=FALSE) mypredict <- function(newdata) @@ -400,56 +759,70 @@

Other examples of publishing web services

} # Example use of the prediction function -print(mypredict(test))
-
## [1] 24.54431 21.15155 33.88859 34.06615 34.93906
-
# Publish the service
+print(mypredict(test))
+
+ +
## [1] 24.54431 21.15155 33.88859 34.06615 34.93906
+
+ +
# Publish the service
 ep <- publishWebService(ws = ws, fun = mypredict, name = "AzureML-vignette-gbm",
                         inputSchema = test)
 
 # Consume test data, comparing with result above
-print(consume(ep, test))
-
## Request failed with status 401. Retrying request...
+print(consume(ep, test)) + +
##        ans
 ## 1 24.54431
 ## 2 21.15155
 ## 3 33.88859
 ## 4 34.06615
-## 5 34.93906
-

Notice that we don’t need to explicitly specific the inputSchema or outputSchema arguments when working with functions that use data frame I/O. When finished with this example, we can delete the example service with:

-
deleteWebService(ws, "AzureML-vignette-gbm")
-
-
+## 5 34.93906 + + +

Notice that we don't need to explicitly specific the inputSchema or +outputSchema arguments when working with functions that use data frame I/O. +When finished with this example, we can delete the example service with:

+ +
deleteWebService(ws, "AzureML-vignette-gbm")
+
+

Tips on writing functions used in web services

-

Try to use the data frame I/O interface as illustrated in the last example above. It’s simpler and more robust than using functions of scalars or lists and exhibits faster execution for large data sets.

-

Use require in your function to explicitly load required packages.

-

The publishWebServce() function uses codetools to bundle objects required by your function following R lexical scoping rules. The previous example, for instance, uses the best.iter and gbm1 variables inside of the mypredict() function. publishWebService() identified that and included their definitions in the R environment in which the function is evaluated in AzureML. Fine-grained control over the export of variables is provided by the publishWebService() function in case you need it (see the help page for details).

-

Use the packages option of publishWebService() to explicitly bundle required packages and their dependencies (but not suggested dependencies) using miniCRAN. This lets you upload packages to AzureML that may not otherwise be available in that environment already, using the correct R version and platform used by AzureML.

-

Be aware that the version of R running in AzureML may not be the same as the version of R that you are running locally. That means that some packages might not be available, or sometimes package behavior in the AzureML version of R might be different that what you observe locally. This is generally more of an issue for cutting-edge packages.

-

JSON is used to transfer data between your local R environment and the R services running in AzureML–numeric values experience a change of base, which can lead to a small loss of precision in some circumstances. If you really, really need to move binary objects between your local R session and the AzureML R service you might try base64 encoding the data, for example.

-
-
+

Try to use the data frame I/O interface as illustrated in the last example +above. It's simpler and more robust than using functions of scalars or lists +and exhibits faster execution for large data sets.

-
+

Use require in your function to explicitly load required packages.

- +

Be aware that the version of R running in AzureML may not be the same as the +version of R that you are running locally. That means that some packages might +not be available, or sometimes package behavior in the AzureML version of R +might be different that what you observe locally. This is generally more of an +issue for cutting-edge packages.

- - +

JSON is used to transfer data between your local R environment and the R +services running in AzureML–numeric values experience a change of base, which +can lead to a small loss of precision in some circumstances. If you really, +really need to move binary objects between your local R session and the AzureML +R service you might try base64 encoding the data, for example.

+