From 45454ec329f6dbd6a3734c88662e24540be861b2 Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Thu, 10 Dec 2015 05:29:46 -0800 Subject: [PATCH 01/14] Fix bug in workspace() that caused incorrect values. Added additional tests. --- R/makeConfig.R | 19 +++++++++++++++++++ R/methods.R | 1 + R/workspace.R | 15 +++++++++++---- tests/testthat/test-1-workspace.R | 28 ++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 R/makeConfig.R diff --git a/R/makeConfig.R b/R/makeConfig.R new file mode 100644 index 0000000..decd5a9 --- /dev/null +++ b/R/makeConfig.R @@ -0,0 +1,19 @@ +makeConfig <- function(id = NULL, authorization_token = NULL, + api_endpoint = NULL, management_endpoint = NULL, file){ + x <- list( + id = id, + authorization_token = authorization_token, + api_endpoint = api_endpoint, + management_endpoint = management_endpoint + ) + conf <- list( + workspace = x[!sapply(x, is.null)] + ) + js <- jsonlite::toJSON(conf, pretty = TRUE) + # browser() + if(!missing(file) && !is.null(file)) { + writeLines(js, con = file) + } else { + js + } +} diff --git a/R/methods.R b/R/methods.R index 01e5ff5..8b3a4fb 100644 --- a/R/methods.R +++ b/R/methods.R @@ -49,6 +49,7 @@ print.Workspace = function(x, ...) cat("AzureML Workspace\n") cat("Workspace ID: ", x$id, "\n") cat("API endpoint:", x$.api_endpoint, "\n") + cat("Management endpoint:", x$.management_endpoint, "\n") } #' @export diff --git a/R/workspace.R b/R/workspace.R index e6a2483..602848b 100644 --- a/R/workspace.R +++ b/R/workspace.R @@ -63,8 +63,15 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, { if(missing(id) || missing(auth) || missing(api_endpoint) || missing(management_endpoint)) { - if(!file.exists(config)) stop("missing file ", config) - s = fromJSON(file(config)) + if(!file.exists(config)) stop(sprintf("config file is missing: '%s'", config)) + s = tryCatch(fromJSON(file(config)), + error = function(e)e + ) + if(inherits(s, "error")) { + msg <- sprintf("Your config file contains invalid json", config) + msg <- paste(msg, s$message, sep = "\n\n") + stop(msg, call. = FALSE) + } if(missing(id)){ id <- s[["workspace"]][["id"]] } @@ -78,8 +85,8 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, management_endpoint <- s[["workspace"]][["management_endpoint"]] } } - if(!exists("api_endpoint")) api_endpoint <- api_endpoint_default - if(!exists("management_endpoint")) management_endpoint <- management_endpoint_default + if(is.null(api_endpoint)) api_endpoint <- api_endpoint_default + if(is.null(management_endpoint)) management_endpoint <- management_endpoint_default # test to see if api_endpoint is a valid url resp <- tryCatch( diff --git a/tests/testthat/test-1-workspace.R b/tests/testthat/test-1-workspace.R index b8ba62d..9c3ce64 100644 --- a/tests/testthat/test-1-workspace.R +++ b/tests/testthat/test-1-workspace.R @@ -34,3 +34,31 @@ if(file.exists(settingsFile)) message("To run tests, add a file ~/.azureml/settings.json containing AzureML keys, see ?workspace for help") message("No tests ran") } + +context("Reading from settings.json file") + +test_that("Add api_endpoint and management_endpoint if missing from config", { + tf <- tempfile(fileext = ".json") + on.exit(unlink(tf)) + makeConfig("x", "y", file = tf) + ws <- workspace(config = tf) + expect_equal(ws$id, "x") + expect_equal(ws$.api_endpoint, api_endpoint_default) + expect_equal(ws$.management_endpoint, management_endpoint_default) +}) + +test_that("Add api_endpoint and management_endpoint if missing from config", { + expect_error(workspace(config = "file_does_not_exist"), + "config file is missing: 'file_does_not_exist'") +}) + +test_that("Throws helpful error if config is invalid json", { + tf <- tempfile(fileext = ".json") + on.exit(unlink(tf)) + writeLines("garbage", con = tf) + msg <- cat("Error : Your config file contains invalid json\n\nlexical error: invalid char in json text.\n garbage \n (right here) ------^\n\n") + + expect_error(workspace(config = tf), + msg) +}) + From 114f83938551526f601a7dcc1f14d16d33f311ee Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Thu, 10 Dec 2015 06:07:34 -0800 Subject: [PATCH 02/14] Suppress zip progress messages #66 --- R/internal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/internal.R b/R/internal.R index 7b59141..4f2cb3c 100644 --- a/R/internal.R +++ b/R/internal.R @@ -204,7 +204,7 @@ packageEnv <- function(exportenv, packages=NULL, version="3.1.0") } z = try({ - zip(zipfile="export.zip", files=dir()) + zip(zipfile="export.zip", files=dir(), flags = "-r9Xq") }) if(inherits(z, "error") || z > 0) stop("Unable to create zip file") setwd(cwd) From 0aec0d854c2abd3114e02644e82dcfe3e7e715dc Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Thu, 10 Dec 2015 23:40:17 -0800 Subject: [PATCH 03/14] Rebuild documentation --- man/consume.Rd | 2 +- man/deleteWebService.Rd | 2 +- man/publishWebService.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/consume.Rd b/man/consume.Rd index 06a6afb..7ef6a3e 100644 --- a/man/consume.Rd +++ b/man/consume.Rd @@ -154,7 +154,7 @@ deleteWebService(ws, "addme") # A neat trick to evaluate any expression in the Azure ML virtual # machine R session and view its output: ep <- publishWebService(ws, - fun = function(expr) { + fun = function(expr) { paste(capture.output( eval(parse(text=expr))), collapse="\\n") }, diff --git a/man/deleteWebService.Rd b/man/deleteWebService.Rd index dc5c2ee..d2a12bc 100644 --- a/man/deleteWebService.Rd +++ b/man/deleteWebService.Rd @@ -149,7 +149,7 @@ deleteWebService(ws, "addme") # A neat trick to evaluate any expression in the Azure ML virtual # machine R session and view its output: ep <- publishWebService(ws, - fun = function(expr) { + fun = function(expr) { paste(capture.output( eval(parse(text=expr))), collapse="\\n") }, diff --git a/man/publishWebService.Rd b/man/publishWebService.Rd index 33048f1..e922dd3 100644 --- a/man/publishWebService.Rd +++ b/man/publishWebService.Rd @@ -199,7 +199,7 @@ deleteWebService(ws, "addme") # A neat trick to evaluate any expression in the Azure ML virtual # machine R session and view its output: ep <- publishWebService(ws, - fun = function(expr) { + fun = function(expr) { paste(capture.output( eval(parse(text=expr))), collapse="\\n") }, From d21cc9d9755fbce4f0148d95021cb17980bb82eb Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Thu, 10 Dec 2015 23:40:46 -0800 Subject: [PATCH 04/14] Add .Rbuildignore --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .Rbuildignore diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..8bc8893 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1 @@ +Readme.* \ No newline at end of file From 66c0961c6239e4e7f746aacaf53941d7364fa87b Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Thu, 10 Dec 2015 23:41:04 -0800 Subject: [PATCH 05/14] Add MASS as suggested package --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index dc2cc88..433e22d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,4 +28,5 @@ Suggests: testthat, knitr, lme4, - gbm + gbm, + MASS From 957ab0ac5ac652c339b3082b8035270a058f8598 Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Thu, 10 Dec 2015 23:42:11 -0800 Subject: [PATCH 06/14] Additional unit tests --- tests/testthat.R | 1 + tests/testthat/test-1-workspace.R | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/testthat.R b/tests/testthat.R index 1e16d2e..e57559e 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,2 +1,3 @@ +Sys.setenv("R_TESTS" = "") library(testthat) test_check("AzureML") diff --git a/tests/testthat/test-1-workspace.R b/tests/testthat/test-1-workspace.R index 9c3ce64..a7c3b9c 100644 --- a/tests/testthat/test-1-workspace.R +++ b/tests/testthat/test-1-workspace.R @@ -4,32 +4,32 @@ settingsFile <- "~/.azureml/settings.json" if(file.exists(settingsFile)) { context("Connect to workspace") - + test_that("Can connect to workspace with supplied id and auth", { js <- jsonlite::fromJSON(settingsFile) id <- js$workspace$id auth <- js$workspace$authorization_token - + expect_true(!is.null(id)) expect_true(!is.null(auth)) - + ws <- workspace(id, auth) - + expect_is(ws, c("Workspace")) expect_equal(ls(ws), c("datasets", "experiments", "id", "services")) expect_equal(ws$id, id) }) - + test_that("Can connect to workspace with config file", { skip_on_cran() skip_on_travis() - + ws <- workspace() - + expect_is(ws, c("Workspace")) expect_equal(ls(ws), c("datasets", "experiments", "id", "services")) }) - + } else { message("To run tests, add a file ~/.azureml/settings.json containing AzureML keys, see ?workspace for help") message("No tests ran") @@ -56,9 +56,9 @@ test_that("Throws helpful error if config is invalid json", { tf <- tempfile(fileext = ".json") on.exit(unlink(tf)) writeLines("garbage", con = tf) - msg <- cat("Error : Your config file contains invalid json\n\nlexical error: invalid char in json text.\n garbage \n (right here) ------^\n\n") - - expect_error(workspace(config = tf), - msg) + msg <- tryCatch(workspace(config = tf), error = function(e)e)$message + expect_true( + grepl("Your config file contains invalid json", msg) + ) }) From 2baeff5c536838b1526e6639de76976cf3bc8dbf Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Fri, 11 Dec 2015 12:20:12 -0800 Subject: [PATCH 07/14] Set cores to 1 in gbm() to deal with warnings in R CMD check --- vignettes/getting_started.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/getting_started.Rmd b/vignettes/getting_started.Rmd index c35afa5..a0c83b5 100644 --- a/vignettes/getting_started.Rmd +++ b/vignettes/getting_started.Rmd @@ -412,7 +412,8 @@ gbm1 <- gbm(medv ~ ., n.minobsinnode = 1, shrinkage = 0.01, cv.folds = 5, - data = Boston) + data = Boston, + n.cores = 1) # You can set this to n.cores = NULL to use all cores best.iter <- gbm.perf(gbm1, method="cv", plot=FALSE) mypredict <- function(newdata) From ff63617bfdb6f448540ab7d6df4f9e40b2c33dbd Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Fri, 11 Dec 2015 13:01:31 -0800 Subject: [PATCH 08/14] Create single function to validate ws is a workspace object --- R/datasets.R | 8 +++++--- R/discover.R | 4 ++-- R/methods.R | 10 ++++++++++ R/publish.R | 4 ++-- R/workspace.R | 1 + 5 files changed, 20 insertions(+), 7 deletions(-) diff --git a/R/datasets.R b/R/datasets.R index 2cc11b9..3d0cb80 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -128,7 +128,7 @@ download.intermediate.dataset <- function(ws, experiment, node_id, port_name="Re #' @example inst/examples/example_upload.R upload.dataset <- function(x, ws, name, description = "", family_id="", ...) { - if(!is.Workspace(ws)) stop("ws must be a Workspace object") + stopIfNotWorkspace(ws) if(name %in% datasets(ws)$Name) { msg <- sprintf("A dataset with the name '%s' already exists in AzureML", name) stop(msg) @@ -185,6 +185,8 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) ws$datasets[ws$datasets$Id == id, ] } + + #' Delete datasets from an AzureML workspace. #' #' @inheritParams refresh @@ -193,7 +195,7 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) #' @return A data frame with columns Name, Deleted, status_code indicating the HTTP status code and success/failure result of the delete operation for each dataset. #' @family dataset functions #' @export -delete.datasets <- function(ws, name, host="https://studioapi.azureml.net/api") +delete.datasets <- function(ws, name, host) { # https://studioapi.azureml.net/api/workspaces//datasources/family/ HTTP/1.1 datasets = name @@ -208,7 +210,7 @@ delete.datasets <- function(ws, name, host="https://studioapi.azureml.net/api") handle_setopt(h, customrequest="DELETE") status_code = vapply(datasets$FamilyId, function(familyId) { - uri = sprintf("%s/workspaces/%s/datasources/family/%s", host, + uri = sprintf("%s/workspaces/%s/datasources/family/%s", ws$.api_endpoint, curl_escape(ws$id), curl_escape(familyId)) try_fetch(uri, h)$status_code }, 1, USE.NAMES=FALSE) diff --git a/R/discover.R b/R/discover.R index d391033..4b6775d 100644 --- a/R/discover.R +++ b/R/discover.R @@ -60,7 +60,7 @@ #' @export services <- function(ws, service_id, name, host = ws$.management_endpoint) { - if(!is.Workspace(ws)) stop("ws must be an AzureML Workspace object") + stopIfNotWorkspace(ws) h = new_handle() headers = list(`User-Agent`="R", `Content-Type`="application/json;charset=UTF8", @@ -144,7 +144,7 @@ getWebServices = services #' @export endpoints <- function(ws, service_id, endpoint_id, host = ws$.management_endpoint) { - if(!is.Workspace(ws)) stop("ws must be an AzureML Workspace object") + stopIfNotWorkspace(ws) # if(is.list(service_id) || is.data.frame(service_id)) service_id = service_id$Id[1] if(is.Service(service_id)) service_id = service_id$Id[1] diff --git a/R/methods.R b/R/methods.R index 8b3a4fb..afe89fa 100644 --- a/R/methods.R +++ b/R/methods.R @@ -21,6 +21,16 @@ # THE SOFTWARE. +stopIfNotWorkspace <- function(x){ + if(!is.Workspace(x)) { + msg <- paste0("Error in ", as.character(sys.call(-1))[1], "()\n", + "ws must be a Workspace object. See ?workspace" + ) + stop(msg, call. = FALSE) + } +} + + #' @title Test if an object is an Azure ML workspace. #' @param x an R object #' @return logical value, TRUE if \code{x} represents an Azure ML workspace. diff --git a/R/publish.R b/R/publish.R index ab849d1..4aeff95 100644 --- a/R/publish.R +++ b/R/publish.R @@ -151,7 +151,7 @@ publishWebService <- function(ws, fun, name, version="3.1.0", serviceId, host = ws$.management_endpoint) { # Perform validation on inputs - if(!is.Workspace(ws)) stop("ws must be a workspace object") + stopIfNotWorkspace(ws) if(!zipAvailable()) stop(zipNotAvailableMessage) if(is.character(fun)) stop("You must specify 'fun' as a function, not a character") if(!is.function(fun)) stop("The argument 'fun' must be a function.") @@ -263,7 +263,7 @@ deleteWebService <- function(ws, name, refresh = TRUE) { #DELETE https://management.azureml.net/workspaces/{id}/webservices/{id}[/endpoints/{name}] - if(!is.Workspace(ws)) stop("Invalid ws. Please provide a workspace object") + stopIfNotWorkspace(ws) if(is.data.frame(name) || is.list(name)){ name = name$Id[1] } else { diff --git a/R/workspace.R b/R/workspace.R index 602848b..97ba62b 100644 --- a/R/workspace.R +++ b/R/workspace.R @@ -155,6 +155,7 @@ refresh <- function(ws, what=c("everything", "datasets", "experiments", "service #' @example inst/examples/example_datasets.R datasets <- function(ws, filter=c("all", "my datasets", "samples")) { + stopIfNotWorkspace(ws) filter = match.arg(filter) if(filter == "all") return(ws$datasets) samples = filter == "samples" From 47c49ba488225d69c11e2b8460f7ea4919a8bd04 Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Tue, 15 Dec 2015 22:28:17 +0000 Subject: [PATCH 09/14] Start to clean up all references to host; Fix studioapi.azureml.net/api and studio.azureml.net/api; #67 --- R/consume.R | 6 ++++-- R/datasets.R | 45 +++++++++++++++++++++++++-------------------- R/discover.R | 12 ++++++------ R/internal.R | 13 ++++++++++--- R/workspace.R | 37 +++++++++++++++++++++---------------- 5 files changed, 66 insertions(+), 47 deletions(-) diff --git a/R/consume.R b/R/consume.R index 45f4550..4d10aa3 100644 --- a/R/consume.R +++ b/R/consume.R @@ -115,7 +115,7 @@ callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) -#' Discover web service schema +#' Discover web service schema. #' #' Discover the expected input to a web service specified by a web service ID ng the workspace ID and web service ID, information specific to the consumption functions #' @@ -130,7 +130,9 @@ callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) #' #' @family discovery functions #' @export -discoverSchema <- function(helpURL, scheme = "https", host = "ussouthcentral.services.azureml.net", api_version = "2.0") +discoverSchema <- function(helpURL, scheme = "https", + host = "ussouthcentral.services.azureml.net", + api_version = "2.0") { workspaceId = getDetailsFromUrl(helpURL)[1] endpointId = getDetailsFromUrl(helpURL)[3] diff --git a/R/datasets.R b/R/datasets.R index 3d0cb80..f631674 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -95,10 +95,12 @@ download.datasets <- function(source, name, ...) #' @export #' @family dataset functions #' @family experiment functions -download.intermediate.dataset <- function(ws, experiment, node_id, port_name="Results dataset", data_type_id="GenericCSV", ...) +download.intermediate.dataset <- function(ws, experiment, node_id, + port_name = "Results dataset", + data_type_id = "GenericCSV", ...) { url = sprintf("%s/workspaces/%s/experiments/%s/outputdata/%s/%s", - ws$.baseuri, curl_escape(ws$id), + ws$.studioapi, curl_escape(ws$id), curl_escape(experiment), curl_escape(node_id), curl_escape(port_name)) h = new_handle() @@ -141,7 +143,7 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) # Step 1 tsv = capture.output(write.table(x, file = "", sep = "\t", row.names = FALSE, ...)) url = sprintf("%s/resourceuploads/workspaces/%s/?userStorage=true&dataTypeId=GenericTSV", - ws$.baseuri, curl_escape(ws$id)) + ws$.studioapi, curl_escape(ws$id)) h = new_handle() hdr = ws$.headers hdr["Content-Type"] = "text/plain" @@ -169,7 +171,7 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) ClientPoll = TRUE), auto_unbox=TRUE) url = sprintf("%s/workspaces/%s/datasources", - ws$.baseuri, curl_escape(ws$id)) + ws$.studioapi, curl_escape(ws$id)) handle_reset(h) # Preserves connection, cookies handle_setheaders(h, .list=ws$.headers) body = charToRaw(paste(metadata, collapse="\n")) @@ -195,25 +197,28 @@ upload.dataset <- function(x, ws, name, description = "", family_id="", ...) #' @return A data frame with columns Name, Deleted, status_code indicating the HTTP status code and success/failure result of the delete operation for each dataset. #' @family dataset functions #' @export -delete.datasets <- function(ws, name, host) -{ +delete.datasets <- function(ws, name, host){ + stopIfNotWorkspace(ws) # https://studioapi.azureml.net/api/workspaces//datasources/family/ HTTP/1.1 - datasets = name + datasets <- name refresh(ws, "datasets") - if(!inherits(datasets, "Datasets")) - { - datasets = datasets(ws) - datasets = datasets[datasets$Name %in% name, ] + if(!inherits(datasets, "Datasets")){ + datasets <- datasets(ws) + datasets <- datasets[datasets$Name %in% name, ] } - h = new_handle() - handle_setheaders(h, .list=ws$.headers) - handle_setopt(h, customrequest="DELETE") - status_code = vapply(datasets$FamilyId, function(familyId) - { - uri = sprintf("%s/workspaces/%s/datasources/family/%s", ws$.api_endpoint, - curl_escape(ws$id), curl_escape(familyId)) - try_fetch(uri, h)$status_code - }, 1, USE.NAMES=FALSE) + h <- new_handle() + handle_setheaders(h, .list = ws$.headers) + handle_setopt(h, customrequest = "DELETE") + status_code <- vapply(datasets$FamilyId, + function(familyId){ + uri <- sprintf("%s/workspaces/%s/datasources/family/%s", + ws$.studioapi, + curl_escape(ws$id), + curl_escape(familyId) + ) + try_fetch(uri, h)$status_code + }, 1, USE.NAMES = FALSE + ) ans = data.frame( Name = datasets$Name, Deleted=status_code < 300, diff --git a/R/discover.R b/R/discover.R index 4b6775d..9525e91 100644 --- a/R/discover.R +++ b/R/discover.R @@ -61,8 +61,8 @@ services <- function(ws, service_id, name, host = ws$.management_endpoint) { stopIfNotWorkspace(ws) - h = new_handle() - headers = list(`User-Agent`="R", + h <- new_handle() + headers <- list(`User-Agent`="R", `Content-Type`="application/json;charset=UTF8", `Authorization`=sprintf("Bearer %s",ws$.auth), `Accept`="application/json") @@ -71,12 +71,12 @@ services <- function(ws, service_id, name, host = ws$.management_endpoint) if(missing(service_id)) service_id = "" else service_id = sprintf("/%s", service_id) - r = curl( + r <- curl( sprintf("%s/workspaces/%s/webservices%s", host, ws$id, service_id), - handle=h + handle = h ) on.exit(close(r)) - ans = tryCatch(fromJSON(readLines(r, warn=FALSE)), error=function(e) NULL) + ans <- tryCatch(fromJSON(readLines(r, warn = FALSE)), error = function(e) NULL) attr(ans, "workspace") = ws if(!missing(name)) { ans = ans[ans$Name == name,] @@ -91,7 +91,7 @@ services <- function(ws, service_id, name, host = ws$.management_endpoint) #' @rdname services #' @export -getWebServices = services +getWebServices <- services #' List AzureML Web Service Endpoints #' diff --git a/R/internal.R b/R/internal.R index 4f2cb3c..e8c739c 100644 --- a/R/internal.R +++ b/R/internal.R @@ -32,7 +32,10 @@ date_origin = "1970-1-1" #' @param delay in seconds between retries, subject to exponent #' @param exponent increment each successive delay by delay^exponent #' @return the result of curl_fetch_memory(uri, handle) -try_fetch <- function(uri, handle, retry_on=c(503,504,509,400,401,440), tries=3, delay=10, exponent=1.2) +try_fetch <- function(uri, handle, + retry_on = c(400, 401, 440, 503, 504, 509), + tries = 3, + delay = 10, exponent = 1.2) { i = 0 while(i < tries) @@ -48,6 +51,10 @@ try_fetch <- function(uri, handle, retry_on=c(503,504,509,400,401,440), tries=3, r } +# urlAPIinsert <- function(x, text = "api"){ +# gsub("(http.*?)(\\..*)", sprintf("\\1%s\\2", text), x) +# } + urlconcat <- function(a,b) { ans = paste(gsub("/$", "", a), b, sep="/") @@ -66,7 +73,7 @@ get_datasets <- function(ws) { h = new_handle() handle_setheaders(h, .list=ws$.headers) - r = curl(sprintf("%s/workspaces/%s/datasources", ws$.baseuri, ws$id), handle=h) + r = curl(sprintf("%s/workspaces/%s/datasources", ws$.studioapi, ws$id), handle=h) on.exit(close(r)) x = tryCatch(fromJSON(readLines(r, warn=FALSE)), error=invisible) if(is.null(x) || is.na(x$Name[1])) @@ -109,7 +116,7 @@ get_experiments <- function(ws) { h = new_handle() handle_setheaders(h, .list=ws$.headers) - r = curl(sprintf("%s/workspaces/%s/experiments", ws$.baseuri, ws$id), handle=h) + r = curl(sprintf("%s/workspaces/%s/experiments", ws$.studioapi, ws$id), handle=h) on.exit(close(r)) x = fromJSON(readLines(r, warn=FALSE)) # Use strict variable name matching to look up data diff --git a/R/workspace.R b/R/workspace.R index 97ba62b..03962be 100644 --- a/R/workspace.R +++ b/R/workspace.R @@ -21,8 +21,10 @@ # THE SOFTWARE. -api_endpoint_default <- "https://studio.azureml.net" +api_endpoint_default <- "https://studio.azureml.net" management_endpoint_default <- "https://management.azureml.net" +studioapi_default <- "https://studioapi.azureml.net/api" +baseuri_default <- "https://studio.azureml.net/api" #' Create a reference to an AzureML Studio workspace. #' @@ -59,7 +61,7 @@ management_endpoint_default <- "https://management.azureml.net" #' @seealso \code{\link{datasets}}, \code{\link{experiments}}, \code{\link{refresh}}, #' \code{\link{services}}, \code{\link{consume}}, \code{\link{publishWebService}} workspace <- function(id, auth, api_endpoint, management_endpoint, - config="~/.azureml/settings.json") + config="~/.azureml/settings.json") { if(missing(id) || missing(auth) || missing(api_endpoint) || missing(management_endpoint)) { @@ -102,17 +104,20 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, ) if(inherits(resp, "error")) stop("Invalid management_endpoint: ", management_endpoint) - e = new.env() - class(e) = "Workspace" - e$id = id - e$.auth = auth - e$.api_endpoint = api_endpoint - e$.management_endpoint = management_endpoint - e$.baseuri = urlconcat(api_endpoint, "api") - e$.headers = list(`User-Agent`="R", - `Content-Type`="application/json;charset=UTF8", - `x-ms-client-session-id`="DefaultSession", - `x-ms-metaanalytics-authorizationtoken`=auth) + e <- new.env() + class(e) <- "Workspace" + e$id <- id + e$.auth <- auth + e$.api_endpoint <- api_endpoint + e$.management_endpoint <- management_endpoint + e$.studioapi <- studioapi_default + e$.studiobase <- baseuri_default + e$.headers <- list( + `User-Agent` = "R", + `Content-Type` = "application/json;charset=UTF8", + `x-ms-client-session-id` = "DefaultSession", + `x-ms-metaanalytics-authorizationtoken` = auth + ) delayedAssign("experiments", get_experiments(e), assign.env=e) delayedAssign("datasets", get_datasets(e), assign.env=e) delayedAssign("services", services(e), assign.env=e) @@ -132,9 +137,9 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, refresh <- function(ws, what=c("everything", "datasets", "experiments", "services")) { what = match.arg(what) - if(what %in% c("everything", "experiments")) ws$experiments = get_experiments(ws) - if(what %in% c("everything", "datasets")) ws$datasets = get_datasets(ws) - if(what %in% c("everything", "services")) ws$services = services(ws) + if(what %in% c("everything", "experiments")) ws$experiments <- get_experiments(ws) + if(what %in% c("everything", "datasets")) ws$datasets <- get_datasets(ws) + if(what %in% c("everything", "services")) ws$services <- services(ws) invisible() } From 33ac917db26ca199c9efc1b9e9000edbfd4d9e78 Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Wed, 16 Dec 2015 11:45:08 +0000 Subject: [PATCH 10/14] Additional fixes to resolve studioapi.azureml.net/api and studio.azureml.net/api; #67 --- R/consume.R | 62 +++++++++---------- R/discover.R | 14 +++-- R/internal.R | 15 ++--- R/methods.R | 14 +++-- R/workspace.R | 28 ++++++--- tests/testthat/test-1-workspace.R | 4 +- .../test-2-datasets-upload-download-delete.R | 5 +- .../test-4-download-each-dataset-type.R | 3 + 8 files changed, 85 insertions(+), 60 deletions(-) diff --git a/R/consume.R b/R/consume.R index 4d10aa3..b4e71e3 100644 --- a/R/consume.R +++ b/R/consume.R @@ -25,35 +25,36 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu { if(is.Service(endpoint)) { - if(nrow(endpoint) > 1) endpoint = endpoint[1,] - default = endpoint$DefaultEndpointName - endpoint = endpoints(attr(endpoint, "workspace"), endpoint) - endpoint = subset(endpoint, Name=default) + if(nrow(endpoint) > 1) endpoint = endpoint[1, ] + default <- endpoint$DefaultEndpointName + endpoint <- endpoints(attr(endpoint, "workspace"), endpoint) + endpoint <- subset(endpoint, Name = default) } - if(!is.Endpoint(endpoint)) stop("Invalid endpoint. Use publishWebservice() or endpoints() to create or obtain a service endpoint.") + if(!is.Endpoint(endpoint)) { + msg <- "Invalid endpoint. Use publishWebservice() or endpoints() to create or obtain a service endpoint." + stop(msg) + } - apiKey = endpoint$PrimaryKey - requestUrl = endpoint$ApiLocation + apiKey <- endpoint$PrimaryKey + requestUrl <- endpoint$ApiLocation if(missing(globalParam)) { globalParam = setNames(list(), character(0)) } # Store variable number of lists entered as a list of lists requestsLists = list(...) - if(length(requestsLists)==1 && is.data.frame(requestsLists[[1]])) - { + if(length(requestsLists)==1 && is.data.frame(requestsLists[[1]])) { requestsLists = requestsLists[[1]] - } else - { + } else { if(!is.list(requestsLists[[1]])) requestsLists = list(requestsLists) } # Make API call with parameters - result = callAPI(apiKey, requestUrl, requestsLists, globalParam, retryDelay) + result <- callAPI(apiKey, requestUrl, requestsLists, globalParam, retryDelay) if(inherits(result, "error")) stop("AzureML returned error code") + # Access output by converting from JSON into list and indexing into Results - if(!is.null(output) && output == "output1") - { + if(!is.null(output) && output == "output1") { help = endpointHelp(endpoint)$definitions$output1Item ans = data.frame(result$Results$output1) nums = which("number" == unlist(help)[grepl("\\.type$", names(unlist(help)))]) @@ -62,8 +63,9 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu if(length(logi) > 0) for(j in logi) ans[,j] = as.logical(ans[,j]) return(ans) } - if(!is.null(output) && output == "output2") + if(!is.null(output) && output == "output2") { return(fromJSON(result$Results$output2[[1]])) + } result$Results } @@ -84,30 +86,28 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu #' @importFrom jsonlite toJSON #' @importFrom curl handle_setheaders new_handle handle_setopt curl_fetch_memory #' @keywords internal -callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) -{ +callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, retryDelay=10) { # Set number of tries and HTTP status to 0 - result = NULL + result <- NULL # Construct request payload - req = list( + req <- list( Inputs = list(input1 = keyvalues), GlobalParameters = globalParam ) - body = charToRaw(paste(toJSON(req, auto_unbox=TRUE, digits=16), collapse = "\n")) - h = new_handle() - headers = list(`User-Agent`="R", - `Content-Type`="application/json", - `Authorization`=sprintf("Bearer %s", apiKey)) - handle_setheaders(h, .list=headers) + body <- charToRaw(paste(toJSON(req, auto_unbox=TRUE, digits=16), collapse = "\n")) + h <- new_handle() + headers <- list(`User-Agent` = "R", + `Content-Type` = "application/json", + `Authorization` = sprintf("Bearer %s", apiKey)) + handle_setheaders(h, .list = headers) handle_setopt(h, .list = list( - post=TRUE, - postfieldsize=length(body), - postfields=body) + post = TRUE, + postfieldsize = length(body), + postfields = body) ) - r = try_fetch(requestUrl, h, delay=retryDelay) + r = try_fetch(requestUrl, h, delay = retryDelay) result = fromJSON(rawToChar(r$content)) - if(r$status_code >= 400) - { + if(r$status_code >= 400) { stop(paste(capture.output(result), collapse="\n")) } result diff --git a/R/discover.R b/R/discover.R index 9525e91..e57fda1 100644 --- a/R/discover.R +++ b/R/discover.R @@ -217,16 +217,19 @@ endpointHelp <- function(e, type = c("apidocument", "r-snippet","score","jobs"," { type = match.arg(type) rsnip = FALSE - if(type=="r-snippet") - { + if(type=="r-snippet") { type = "score" rsnip = TRUE } uri = e$HelpLocation[1] + # XXX This is totally nuts, and not documented, but help hosts vary depending on type. # Arrghhh... - if(type == "apidocument") + if(type == "apidocument"){ uri = gsub("studio.azureml.net/apihelp", "management.azureml.net", uri) + uri = gsub("studio.azureml-int.net/apihelp", "management.azureml-int.net", uri) + } + pattern = "\\s]+))?)+\\s*|\\s*)/?>" con = curl(paste(uri, type, sep="/")) text = paste( @@ -239,10 +242,9 @@ endpointHelp <- function(e, type = c("apidocument", "r-snippet","score","jobs"," collapse="\n" ) close(con) - if(rsnip) - { + if(rsnip) { text = substr(text, - grepRaw("code-snippet-r",text)+nchar("code-snippet-r")+2,nchar(text) + grepRaw("code-snippet-r", text) + nchar("code-snippet-r") + 2, nchar(text) ) } if(type == "apidocument") text = fromJSON(text) diff --git a/R/internal.R b/R/internal.R index e8c739c..51db978 100644 --- a/R/internal.R +++ b/R/internal.R @@ -38,8 +38,7 @@ try_fetch <- function(uri, handle, delay = 10, exponent = 1.2) { i = 0 - while(i < tries) - { + while(i < tries) { r = curl_fetch_memory(uri, handle) if(!(r$status_code %in% retry_on)) return(r) if(i == 0) @@ -76,20 +75,22 @@ get_datasets <- function(ws) r = curl(sprintf("%s/workspaces/%s/datasources", ws$.studioapi, ws$id), handle=h) on.exit(close(r)) x = tryCatch(fromJSON(readLines(r, warn=FALSE)), error=invisible) - if(is.null(x) || is.na(x$Name[1])) - { + if(is.null(x) || is.na(x$Name[1])){ x = data.frame() class(x) = c("Datasets", "data.frame") return(x) } # Use strict variable name matching to look up data d = x[,"DownloadLocation"] - x$DownloadLocation = paste(d[,"BaseUri"], d[,"Location"], + x$DownloadLocation = paste(d[,"BaseUri"], + d[,"Location"], d[,"AccessCredential"], sep="") d = x[,"VisualizeEndPoint"] - x$VisualizeEndPoint = paste(d[,"BaseUri"], d[,"AccessCredential"], sep="") + x$VisualizeEndPoint = paste(d[,"BaseUri"], + d[,"AccessCredential"], sep="") d = x[,"SchemaEndPoint"] - x$SchemaEndPoint = paste(d[,"BaseUri"], d[,"Location"], + x$SchemaEndPoint = paste(d[,"BaseUri"], + d[,"Location"], d[,"AccessCredential"], sep="") class(x) = c("Datasets", "data.frame") x diff --git a/R/methods.R b/R/methods.R index afe89fa..c57d3d4 100644 --- a/R/methods.R +++ b/R/methods.R @@ -54,12 +54,18 @@ is.Endpoint <- function(x){ } #' @export -print.Workspace = function(x, ...) +print.Workspace = function(x, detail = FALSE, ...) { cat("AzureML Workspace\n") - cat("Workspace ID: ", x$id, "\n") - cat("API endpoint:", x$.api_endpoint, "\n") - cat("Management endpoint:", x$.management_endpoint, "\n") + cat("Workspace ID :", x$id, "\n") + if(detail == "all" || isTRUE(detail)){ + cat("API endpoint :", x$.api_endpoint, "\n") + cat("Management endpoint :", x$.management_endpoint, "\n") + } + if(detail == "all"){ + cat("Studio API :", x$.studioapi, "\n") + cat("Studio base API :", x$.studiobase, "\n") + } } #' @export diff --git a/R/workspace.R b/R/workspace.R index 03962be..70bec15 100644 --- a/R/workspace.R +++ b/R/workspace.R @@ -21,10 +21,22 @@ # THE SOFTWARE. -api_endpoint_default <- "https://studio.azureml.net" -management_endpoint_default <- "https://management.azureml.net" -studioapi_default <- "https://studioapi.azureml.net/api" -baseuri_default <- "https://studio.azureml.net/api" +default_api_prod <- list( + api_endpoint = "https://studioapi.azureml.net", + management_endpoint = "https://management.azureml.net", + studioapi = "https://studioapi.azureml.net/api" + # baseuri = "https://studioapi.azureml.net/api" +) + +default_api_int <- list( + api_endpoint = "https://studio.azureml-int.net", + management_endpoint = "https://management.azureml-int.net", + studioapi = "https://studioapi.azureml-int.net/api" + # baseuri = "https://studioapi.azureml-int.net/api" +) + +default_api <- default_api_int + #' Create a reference to an AzureML Studio workspace. #' @@ -87,8 +99,8 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, management_endpoint <- s[["workspace"]][["management_endpoint"]] } } - if(is.null(api_endpoint)) api_endpoint <- api_endpoint_default - if(is.null(management_endpoint)) management_endpoint <- management_endpoint_default + if(is.null(api_endpoint)) api_endpoint <- default_api[["api_endpoint"]] + if(is.null(management_endpoint)) management_endpoint <- default_api[["management_endpoint"]] # test to see if api_endpoint is a valid url resp <- tryCatch( @@ -110,8 +122,8 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, e$.auth <- auth e$.api_endpoint <- api_endpoint e$.management_endpoint <- management_endpoint - e$.studioapi <- studioapi_default - e$.studiobase <- baseuri_default + e$.studioapi <- default_api[["studioapi"]] +# e$.studiobase <- default_api[["baseuri"]] e$.headers <- list( `User-Agent` = "R", `Content-Type` = "application/json;charset=UTF8", diff --git a/tests/testthat/test-1-workspace.R b/tests/testthat/test-1-workspace.R index a7c3b9c..027b57e 100644 --- a/tests/testthat/test-1-workspace.R +++ b/tests/testthat/test-1-workspace.R @@ -43,8 +43,8 @@ test_that("Add api_endpoint and management_endpoint if missing from config", { makeConfig("x", "y", file = tf) ws <- workspace(config = tf) expect_equal(ws$id, "x") - expect_equal(ws$.api_endpoint, api_endpoint_default) - expect_equal(ws$.management_endpoint, management_endpoint_default) + expect_equal(ws$.api_endpoint, default_api[["api_endpoint"]]) + expect_equal(ws$.management_endpoint, default_api[["management_endpoint"]]) }) test_that("Add api_endpoint and management_endpoint if missing from config", { diff --git a/tests/testthat/test-2-datasets-upload-download-delete.R b/tests/testthat/test-2-datasets-upload-download-delete.R index 954ccee..a5c9196 100644 --- a/tests/testthat/test-2-datasets-upload-download-delete.R +++ b/tests/testthat/test-2-datasets-upload-download-delete.R @@ -28,8 +28,9 @@ if(file.exists(settingsFile)) test_that("Can delete dataset from workspace", { z <- delete.datasets(ws, timestamped_name) - expect_true(timestamped_name %in% z$Name) - # refresh(ws) + expect_true(timestamped_name %in% z$Name && z$Deleted[z$Name == timestamped_name]) + # Force refresh - sometime this fails in non-interactive + Sys.sleep(0.5); refresh(ws, what = "datasets") ds <- datasets(ws, filter = "my") expect_false(timestamped_name %in% ds$Name) }) diff --git a/tests/testthat/test-4-download-each-dataset-type.R b/tests/testthat/test-4-download-each-dataset-type.R index 2225223..a942b67 100644 --- a/tests/testthat/test-4-download-each-dataset-type.R +++ b/tests/testthat/test-4-download-each-dataset-type.R @@ -7,6 +7,9 @@ if(file.exists(settingsFile)) ws <- workspace() ds <- datasets(ws, filter = "samples") + ds$Name + testIdx <- grepl("[Tt]est", ds$Name) + ds <- ds[!testIdx, ] unique(ds$DataTypeId) oneOfEach <- do.call( From f5dfea9b1c2cd96f795ffaec11c4726f3911fa6a Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Wed, 16 Dec 2015 12:41:53 +0000 Subject: [PATCH 11/14] Dynamically recognise studio.azureml.net and studio.azureml-int.net #67 --- R/methods.R | 9 +-- R/workspace.R | 61 +++++++++++-------- tests/testthat.R | 2 +- tests/testthat/test-1-workspace.R | 4 +- .../test-2-datasets-upload-download-delete.R | 2 +- 5 files changed, 44 insertions(+), 34 deletions(-) diff --git a/R/methods.R b/R/methods.R index c57d3d4..dcdd3c9 100644 --- a/R/methods.R +++ b/R/methods.R @@ -58,13 +58,10 @@ print.Workspace = function(x, detail = FALSE, ...) { cat("AzureML Workspace\n") cat("Workspace ID :", x$id, "\n") - if(detail == "all" || isTRUE(detail)){ - cat("API endpoint :", x$.api_endpoint, "\n") - cat("Management endpoint :", x$.management_endpoint, "\n") - } - if(detail == "all"){ + cat("API endpoint :", x$.api_endpoint, "\n") + if(detail){ cat("Studio API :", x$.studioapi, "\n") - cat("Studio base API :", x$.studiobase, "\n") + cat("Management endpoint :", x$.management_endpoint, "\n") } } diff --git a/R/workspace.R b/R/workspace.R index 70bec15..135fca7 100644 --- a/R/workspace.R +++ b/R/workspace.R @@ -21,22 +21,31 @@ # THE SOFTWARE. -default_api_prod <- list( - api_endpoint = "https://studioapi.azureml.net", - management_endpoint = "https://management.azureml.net", - studioapi = "https://studioapi.azureml.net/api" - # baseuri = "https://studioapi.azureml.net/api" -) - -default_api_int <- list( - api_endpoint = "https://studio.azureml-int.net", - management_endpoint = "https://management.azureml-int.net", - studioapi = "https://studioapi.azureml-int.net/api" - # baseuri = "https://studioapi.azureml-int.net/api" -) - -default_api <- default_api_int +default_api <- function(api_endpoint = "https://studioapi.azureml.net"){ + defaults <- list( + + "https://studioapi.azureml.net" = list( + api_endpoint = "https://studioapi.azureml.net", + management_endpoint = "https://management.azureml.net", + studioapi = "https://studioapi.azureml.net/api" + + ), "https://studioapi.azureml-int.net" = list( + + api_endpoint = "https://studio.azureml-int.net", + management_endpoint = "https://management.azureml-int.net", + studioapi = "https://studioapi.azureml-int.net/api" + + ) + ) + + + if(api_endpoint %in% names(defaults)){ + defaults[api_endpoint][[1]] + } else { + stop("api_endpoint not recognized") + } +} #' Create a reference to an AzureML Studio workspace. #' @@ -78,27 +87,32 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, if(missing(id) || missing(auth) || missing(api_endpoint) || missing(management_endpoint)) { if(!file.exists(config)) stop(sprintf("config file is missing: '%s'", config)) - s = tryCatch(fromJSON(file(config)), - error = function(e)e + settings = tryCatch(fromJSON(file(config)), + error = function(e)e ) - if(inherits(s, "error")) { + if(inherits(settings, "error")) { msg <- sprintf("Your config file contains invalid json", config) - msg <- paste(msg, s$message, sep = "\n\n") + msg <- paste(msg, settings$message, sep = "\n\n") stop(msg, call. = FALSE) } if(missing(id)){ - id <- s[["workspace"]][["id"]] + id <- settings[["workspace"]][["id"]] } if(missing(auth)){ - auth <- s[["workspace"]][["authorization_token"]] + auth <- settings[["workspace"]][["authorization_token"]] } if(missing(api_endpoint)){ - api_endpoint <- s[["workspace"]][["api_endpoint"]] + api_endpoint <- settings[["workspace"]][["api_endpoint"]] } if(missing(management_endpoint)){ - management_endpoint <- s[["workspace"]][["management_endpoint"]] + management_endpoint <- settings[["workspace"]][["management_endpoint"]] } } + default_api <- if(is.null(api_endpoint)) { + default_api() + } else { + default_api(api_endpoint) + } if(is.null(api_endpoint)) api_endpoint <- default_api[["api_endpoint"]] if(is.null(management_endpoint)) management_endpoint <- default_api[["management_endpoint"]] @@ -123,7 +137,6 @@ workspace <- function(id, auth, api_endpoint, management_endpoint, e$.api_endpoint <- api_endpoint e$.management_endpoint <- management_endpoint e$.studioapi <- default_api[["studioapi"]] -# e$.studiobase <- default_api[["baseuri"]] e$.headers <- list( `User-Agent` = "R", `Content-Type` = "application/json;charset=UTF8", diff --git a/tests/testthat.R b/tests/testthat.R index e57559e..2945efa 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,3 +1,3 @@ Sys.setenv("R_TESTS" = "") -library(testthat) +library(testthat, quietly = TRUE) test_check("AzureML") diff --git a/tests/testthat/test-1-workspace.R b/tests/testthat/test-1-workspace.R index 027b57e..70da399 100644 --- a/tests/testthat/test-1-workspace.R +++ b/tests/testthat/test-1-workspace.R @@ -43,8 +43,8 @@ test_that("Add api_endpoint and management_endpoint if missing from config", { makeConfig("x", "y", file = tf) ws <- workspace(config = tf) expect_equal(ws$id, "x") - expect_equal(ws$.api_endpoint, default_api[["api_endpoint"]]) - expect_equal(ws$.management_endpoint, default_api[["management_endpoint"]]) + expect_equal(ws$.api_endpoint, default_api(ws$.api_endpoint)[["api_endpoint"]]) + expect_equal(ws$.management_endpoint, default_api(ws$.api_endpoint)[["management_endpoint"]]) }) test_that("Add api_endpoint and management_endpoint if missing from config", { diff --git a/tests/testthat/test-2-datasets-upload-download-delete.R b/tests/testthat/test-2-datasets-upload-download-delete.R index a5c9196..aac49fc 100644 --- a/tests/testthat/test-2-datasets-upload-download-delete.R +++ b/tests/testthat/test-2-datasets-upload-download-delete.R @@ -30,7 +30,7 @@ if(file.exists(settingsFile)) z <- delete.datasets(ws, timestamped_name) expect_true(timestamped_name %in% z$Name && z$Deleted[z$Name == timestamped_name]) # Force refresh - sometime this fails in non-interactive - Sys.sleep(0.5); refresh(ws, what = "datasets") + Sys.sleep(1); refresh(ws, what = "datasets") ds <- datasets(ws, filter = "my") expect_false(timestamped_name %in% ds$Name) }) From 3bc8f08219085b7a1f1f917c6456862968a7be6c Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Wed, 16 Dec 2015 16:04:18 +0000 Subject: [PATCH 12/14] Adjusted algorithm for exponential backoff to conform to delay = n * (2^c - 1); Added unit test. #48 --- R/internal.R | 24 +++++++++++++++--------- tests/testthat/test-5-publish.R | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/R/internal.R b/R/internal.R index 51db978..b43209c 100644 --- a/R/internal.R +++ b/R/internal.R @@ -34,18 +34,24 @@ date_origin = "1970-1-1" #' @return the result of curl_fetch_memory(uri, handle) try_fetch <- function(uri, handle, retry_on = c(400, 401, 440, 503, 504, 509), - tries = 3, - delay = 10, exponent = 1.2) + tries = 6, + delay = 1, exponent = 2) { - i = 0 - while(i < tries) { + collisions = 1 + while(collisions < tries) { r = curl_fetch_memory(uri, handle) if(!(r$status_code %in% retry_on)) return(r) - if(i == 0) - message(sprintf("Request failed with status %s. Retrying request...", r$status_code)) - Sys.sleep(delay) - delay = delay^exponent - i = i + 1 + wait_time = delay * (2 ^ collisions - 1) + wait_time <- ceiling(runif(1, min = 0.001, max = wait_time)) + message(sprintf("Request failed with status %s. Waiting %s seconds before retry", + r$status_code, + wait_time)) + for(i in 1:wait_time){ + message(".", appendLF = FALSE) + Sys.sleep(1) + } + message("\n") + collisions = collisions + 1 } r } diff --git a/tests/testthat/test-5-publish.R b/tests/testthat/test-5-publish.R index 8ad088b..e3a0d15 100644 --- a/tests/testthat/test-5-publish.R +++ b/tests/testthat/test-5-publish.R @@ -134,3 +134,22 @@ test_that("publishWebService works with data frame input", { deleteWebService(ws, timestamped_name) }) + +test_that("try_fetch gives exponential retry messages",{ + set.seed(1) + with_mock( + curl_fetch_memory = function(...){ + retry_on = c(400, 401, 440, 503, 504, 509) + status_code <- if(runif(1) > 0.26) sample(retry_on, 1) else 200 + list(status_code = status_code, contents = NA) + }, { + msg <- "Request failed with status 440. Waiting 1 seconds before retry\n" + expect_message( + try_fetch(delay = 0.25), + msg + ) + }, + .env = "curl" + ) + +}) From c0d110baed58e22d036c26355664ec20278eb9e2 Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Fri, 18 Dec 2015 09:47:39 +0000 Subject: [PATCH 13/14] Rebuild docs --- NAMESPACE | 2 +- R/methods.R | 14 +- man/AzureML-deprecated.Rd | 2 +- man/AzureML-package.Rd | 2 +- man/azureSchema.Rd | 2 +- man/callAPI.Rd | 2 +- man/consume.Rd | 2 +- man/datasets.Rd | 8 +- man/delete.datasets.Rd | 10 +- man/deleteWebService.Rd | 4 +- man/discoverSchema.Rd | 9 +- man/download.datasets.Rd | 2 +- man/download.intermediate.dataset.Rd | 10 +- man/endpointHelp.Rd | 7 +- man/endpoints.Rd | 8 +- man/experiments.Rd | 4 +- man/getDetailsFromUrl.Rd | 2 +- man/get_dataset.Rd | 2 +- man/get_datasets.Rd | 2 +- man/get_experiments.Rd | 2 +- man/is.Endpoint.Rd | 6 +- man/is.Service.Rd | 2 +- man/is.Workspace.Rd | 2 +- man/packageEnv.Rd | 2 +- man/publishWebService.Rd | 4 +- man/refresh.Rd | 2 +- man/services.Rd | 8 +- man/test_wrapper.Rd | 2 +- man/try_fetch.Rd | 6 +- man/upload.dataset.Rd | 8 +- man/workspace.Rd | 20 +- vignettes/getting_started.R | 70 ++- vignettes/getting_started.html | 829 +++++++++++++++++++-------- 33 files changed, 730 insertions(+), 327 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 70b3219..3d92d59 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,4 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand S3method(print,Datasets) S3method(print,Experiments) diff --git a/R/methods.R b/R/methods.R index dcdd3c9..6b21275 100644 --- a/R/methods.R +++ b/R/methods.R @@ -31,13 +31,18 @@ stopIfNotWorkspace <- function(x){ } -#' @title Test if an object is an Azure ML workspace. +#' Test if an object is an Azure ML workspace. +#' #' @param x an R object #' @return logical value, TRUE if \code{x} represents an Azure ML workspace. #' @export -is.Workspace <- function(x) "Workspace" %in% class(x) +is.Workspace <- function(x){ + inherits(x, "Workspace") +} + -#' @title Test if an object is an Azure ML Service. +#' Test if an object is an Azure ML Service. +#' #' @param x an R object #' @return logical value, TRUE if \code{x} represents an Azure ML web service #' @export @@ -45,7 +50,8 @@ is.Service <- function(x){ inherits(x, "Service") } -#' @title Test if an object is an Azure ML Endpoint +#' Test if an object is an Azure ML Endpoint. +#' #' @param x an R object #' @return logical value, TRUE if \code{x} represents an Azure ML web service endpoint #' @export diff --git a/man/AzureML-deprecated.Rd b/man/AzureML-deprecated.Rd index cca7436..fe49100 100644 --- a/man/AzureML-deprecated.Rd +++ b/man/AzureML-deprecated.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/azureml-defunct.R \name{consumeDataframe} \alias{consumeDataframe} diff --git a/man/AzureML-package.Rd b/man/AzureML-package.Rd index 216c46e..4eb5509 100644 --- a/man/AzureML-package.Rd +++ b/man/AzureML-package.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/azureml-package.R \docType{package} \name{AzureML-package} diff --git a/man/azureSchema.Rd b/man/azureSchema.Rd index 89d6f98..461ec01 100644 --- a/man/azureSchema.Rd +++ b/man/azureSchema.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{azureSchema} \alias{azureSchema} diff --git a/man/callAPI.Rd b/man/callAPI.Rd index 5caa17e..f9d3067 100644 --- a/man/callAPI.Rd +++ b/man/callAPI.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{callAPI} \alias{callAPI} diff --git a/man/consume.Rd b/man/consume.Rd index 7ef6a3e..0d3c6cc 100644 --- a/man/consume.Rd +++ b/man/consume.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{consume} \alias{consume} diff --git a/man/datasets.Rd b/man/datasets.Rd index 78a8346..f7070fa 100644 --- a/man/datasets.Rd +++ b/man/datasets.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{datasets} \alias{datasets} @@ -53,8 +53,8 @@ an R data.frame, you can alternatively filter on any variable as desired. \seealso{ \code{\link{workspace}}, \code{\link{experiments}}, \code{\link{download.datasets}} -Other dataset functions: \code{\link{delete.datasets}}; - \code{\link{download.intermediate.dataset}}; - \code{\link{upload.dataset}}; \code{\link{workspace}} +Other dataset functions: \code{\link{delete.datasets}}, + \code{\link{download.intermediate.dataset}}, + \code{\link{upload.dataset}}, \code{\link{workspace}} } diff --git a/man/delete.datasets.Rd b/man/delete.datasets.Rd index cdf1ab7..62c1d04 100644 --- a/man/delete.datasets.Rd +++ b/man/delete.datasets.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{delete.datasets} \alias{delete.datasets} \title{Delete datasets from an AzureML workspace.} \usage{ -delete.datasets(ws, name, host = "https://studioapi.azureml.net/api") +delete.datasets(ws, name, host) } \arguments{ \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} @@ -20,8 +20,8 @@ A data frame with columns Name, Deleted, status_code indicating the HTTP status Delete datasets from an AzureML workspace. } \seealso{ -Other dataset functions: \code{\link{datasets}}; - \code{\link{download.intermediate.dataset}}; - \code{\link{upload.dataset}}; \code{\link{workspace}} +Other dataset functions: \code{\link{datasets}}, + \code{\link{download.intermediate.dataset}}, + \code{\link{upload.dataset}}, \code{\link{workspace}} } diff --git a/man/deleteWebService.Rd b/man/deleteWebService.Rd index d2a12bc..01805b8 100644 --- a/man/deleteWebService.Rd +++ b/man/deleteWebService.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{deleteWebService} \alias{deleteWebService} @@ -279,6 +279,6 @@ deleteWebService(ws, "sleepy lmer") \code{\link{services}} \code{\link{publishWebService}} \code{\link{updateWebService}} Other publishing functions: \code{\link{publishWebService}}, - \code{\link{updateWebService}}; \code{\link{workspace}} + \code{\link{workspace}} } diff --git a/man/discoverSchema.Rd b/man/discoverSchema.Rd index 280f29f..c0f8581 100644 --- a/man/discoverSchema.Rd +++ b/man/discoverSchema.Rd @@ -1,8 +1,8 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{discoverSchema} \alias{discoverSchema} -\title{Discover web service schema} +\title{Discover web service schema.} \usage{ discoverSchema(helpURL, scheme = "https", host = "ussouthcentral.services.azureml.net", api_version = "2.0") @@ -25,9 +25,8 @@ Discover the expected input to a web service specified by a web service ID ng th \seealso{ \code{\link{publishWebService}} \code{\link{consume}} \code{\link{workspace}} \code{link{services}} \code{\link{endpoints}} \code{\link{endpointHelp}} -Other discovery functions: \code{\link{endpointHelp}}; - \code{\link{endpoints}}, \code{\link{getEndpoints}}; - \code{\link{getWebServices}}, \code{\link{services}}; +Other discovery functions: \code{\link{endpointHelp}}, + \code{\link{endpoints}}, \code{\link{services}}, \code{\link{workspace}} } diff --git a/man/download.datasets.Rd b/man/download.datasets.Rd index 2c555f5..c3b3f7e 100644 --- a/man/download.datasets.Rd +++ b/man/download.datasets.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{download.datasets} \alias{download.datasets} diff --git a/man/download.intermediate.dataset.Rd b/man/download.intermediate.dataset.Rd index 3b8d50b..ea97c71 100644 --- a/man/download.intermediate.dataset.Rd +++ b/man/download.intermediate.dataset.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{download.intermediate.dataset} \alias{download.intermediate.dataset} @@ -37,11 +37,11 @@ This function can download datasets with various CSV and TSV \code{DataTypeId} ( \seealso{ \code{\link{workspace}}, \code{\link{datasets}}, \code{\link[utils]{read.table}} and \code{\link{download.datasets}} -Other dataset functions: \code{\link{datasets}}; - \code{\link{delete.datasets}}; - \code{\link{upload.dataset}}; \code{\link{workspace}} +Other dataset functions: \code{\link{datasets}}, + \code{\link{delete.datasets}}, + \code{\link{upload.dataset}}, \code{\link{workspace}} -Other experiment functions: \code{\link{experiments}}; +Other experiment functions: \code{\link{experiments}}, \code{\link{workspace}} } diff --git a/man/endpointHelp.Rd b/man/endpointHelp.Rd index 9083bc8..b72a8fa 100644 --- a/man/endpointHelp.Rd +++ b/man/endpointHelp.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discover.R \name{endpointHelp} \alias{endpointHelp} @@ -40,9 +40,8 @@ endpointHelp(e[1,])$definitions } } \seealso{ -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpoints}}, \code{\link{getEndpoints}}; - \code{\link{getWebServices}}, \code{\link{services}}; +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpoints}}, \code{\link{services}}, \code{\link{workspace}} } diff --git a/man/endpoints.Rd b/man/endpoints.Rd index 4df22ca..c277110 100644 --- a/man/endpoints.Rd +++ b/man/endpoints.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discover.R \name{endpoints} \alias{endpoints} @@ -65,8 +65,8 @@ getEndpoints(ws, s$Id[1]) } } \seealso{ -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpointHelp}}; \code{\link{getWebServices}}, - \code{\link{services}}; \code{\link{workspace}} +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpointHelp}}, \code{\link{services}}, + \code{\link{workspace}} } diff --git a/man/experiments.Rd b/man/experiments.Rd index f9ebc62..267a53c 100644 --- a/man/experiments.Rd +++ b/man/experiments.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{experiments} \alias{experiments} @@ -40,7 +40,7 @@ List experiments in an AzureML workspace, optionally filtering on sample or my e \seealso{ \code{\link{workspace}}, \code{\link{datasets}}, \code{\link{download.intermediate.dataset}} -Other experiment functions: \code{\link{download.intermediate.dataset}}; +Other experiment functions: \code{\link{download.intermediate.dataset}}, \code{\link{workspace}} } diff --git a/man/getDetailsFromUrl.Rd b/man/getDetailsFromUrl.Rd index 12e8733..01613d3 100644 --- a/man/getDetailsFromUrl.Rd +++ b/man/getDetailsFromUrl.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consume.R \name{getDetailsFromUrl} \alias{getDetailsFromUrl} diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 006f69a..580e1f7 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{get_dataset} \alias{get_dataset} diff --git a/man/get_datasets.Rd b/man/get_datasets.Rd index e023cc0..c5fcbcb 100644 --- a/man/get_datasets.Rd +++ b/man/get_datasets.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{get_datasets} \alias{get_datasets} diff --git a/man/get_experiments.Rd b/man/get_experiments.Rd index fbb6c75..defb177 100644 --- a/man/get_experiments.Rd +++ b/man/get_experiments.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{get_experiments} \alias{get_experiments} diff --git a/man/is.Endpoint.Rd b/man/is.Endpoint.Rd index 8735f5a..25bc628 100644 --- a/man/is.Endpoint.Rd +++ b/man/is.Endpoint.Rd @@ -1,8 +1,8 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{is.Endpoint} \alias{is.Endpoint} -\title{Test if an object is an Azure ML Endpoint} +\title{Test if an object is an Azure ML Endpoint.} \usage{ is.Endpoint(x) } @@ -13,6 +13,6 @@ is.Endpoint(x) logical value, TRUE if \code{x} represents an Azure ML web service endpoint } \description{ -Test if an object is an Azure ML Endpoint +Test if an object is an Azure ML Endpoint. } diff --git a/man/is.Service.Rd b/man/is.Service.Rd index fb6e707..35e7492 100644 --- a/man/is.Service.Rd +++ b/man/is.Service.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{is.Service} \alias{is.Service} diff --git a/man/is.Workspace.Rd b/man/is.Workspace.Rd index 52267df..70aeb75 100644 --- a/man/is.Workspace.Rd +++ b/man/is.Workspace.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{is.Workspace} \alias{is.Workspace} diff --git a/man/packageEnv.Rd b/man/packageEnv.Rd index 55eb98a..114e293 100644 --- a/man/packageEnv.Rd +++ b/man/packageEnv.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{packageEnv} \alias{packageEnv} diff --git a/man/publishWebService.Rd b/man/publishWebService.Rd index e922dd3..c06d54f 100644 --- a/man/publishWebService.Rd +++ b/man/publishWebService.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{publishWebService} \alias{publishWebService} @@ -328,7 +328,7 @@ deleteWebService(ws, "sleepy lmer") \seealso{ \code{\link{endpoints}}, \code{\link{discoverSchema}}, \code{\link{consume}} and \code{\link{services}}. -Other publishing functions: \code{\link{deleteWebService}}; +Other publishing functions: \code{\link{deleteWebService}}, \code{\link{workspace}} } diff --git a/man/refresh.Rd b/man/refresh.Rd index faf5096..ddf5fbf 100644 --- a/man/refresh.Rd +++ b/man/refresh.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{refresh} \alias{refresh} diff --git a/man/services.Rd b/man/services.Rd index f71a42c..d930154 100644 --- a/man/services.Rd +++ b/man/services.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discover.R \name{services} \alias{getWebServices} @@ -53,8 +53,8 @@ getWebServices(ws) } } \seealso{ -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpointHelp}}; \code{\link{endpoints}}, - \code{\link{getEndpoints}}; \code{\link{workspace}} +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpointHelp}}, \code{\link{endpoints}}, + \code{\link{workspace}} } diff --git a/man/test_wrapper.Rd b/man/test_wrapper.Rd index c971e8c..8da16f6 100644 --- a/man/test_wrapper.Rd +++ b/man/test_wrapper.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/publish.R \name{test_wrapper} \alias{test_wrapper} diff --git a/man/try_fetch.Rd b/man/try_fetch.Rd index 2a97d7a..8f7c6d9 100644 --- a/man/try_fetch.Rd +++ b/man/try_fetch.Rd @@ -1,11 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/internal.R \name{try_fetch} \alias{try_fetch} \title{Try to fetch a uri/handle, retrying on certain returned status codes after a timeout} \usage{ -try_fetch(uri, handle, retry_on = c(503, 504, 509, 400, 401, 440), - tries = 3, delay = 10, exponent = 1.2) +try_fetch(uri, handle, retry_on = c(400, 401, 440, 503, 504, 509), + tries = 6, delay = 1, exponent = 2) } \arguments{ \item{uri}{the uri to fetch} diff --git a/man/upload.dataset.Rd b/man/upload.dataset.Rd index 6a2698a..e0d9ae2 100644 --- a/man/upload.dataset.Rd +++ b/man/upload.dataset.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/datasets.R \name{upload.dataset} \alias{upload.dataset} @@ -46,9 +46,9 @@ The AzureML API does not support uploads for _replacing_ datasets with new data } } \seealso{ -Other dataset functions: \code{\link{datasets}}; - \code{\link{delete.datasets}}; - \code{\link{download.intermediate.dataset}}; +Other dataset functions: \code{\link{datasets}}, + \code{\link{delete.datasets}}, + \code{\link{download.intermediate.dataset}}, \code{\link{workspace}} } diff --git a/man/workspace.Rd b/man/workspace.Rd index defa93c..9d49130 100644 --- a/man/workspace.Rd +++ b/man/workspace.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/workspace.R \name{workspace} \alias{workspace} @@ -45,21 +45,19 @@ If any of the \code{id}, \code{auth}, \code{api_endpoint} or \code{management_en Other consumption functions: \code{\link{consume}} -Other dataset functions: \code{\link{datasets}}; - \code{\link{delete.datasets}}; - \code{\link{download.intermediate.dataset}}; +Other dataset functions: \code{\link{datasets}}, + \code{\link{delete.datasets}}, + \code{\link{download.intermediate.dataset}}, \code{\link{upload.dataset}} -Other discovery functions: \code{\link{discoverSchema}}; - \code{\link{endpointHelp}}; \code{\link{endpoints}}, - \code{\link{getEndpoints}}; \code{\link{getWebServices}}, +Other discovery functions: \code{\link{discoverSchema}}, + \code{\link{endpointHelp}}, \code{\link{endpoints}}, \code{\link{services}} -Other experiment functions: \code{\link{download.intermediate.dataset}}; +Other experiment functions: \code{\link{download.intermediate.dataset}}, \code{\link{experiments}} -Other publishing functions: \code{\link{deleteWebService}}; - \code{\link{publishWebService}}, - \code{\link{updateWebService}} +Other publishing functions: \code{\link{deleteWebService}}, + \code{\link{publishWebService}} } diff --git a/vignettes/getting_started.R b/vignettes/getting_started.R index 8c8c518..40a57da 100644 --- a/vignettes/getting_started.R +++ b/vignettes/getting_started.R @@ -11,13 +11,10 @@ ws <- workspace() ws ## ------------------------------------------------------------------------ -head(datasets(ws)) - -# Or, equivalently: -head(ws$datasets) +head(datasets(ws)) # Or, equivalently: head(ws$datasets) ## ------------------------------------------------------------------------ -ws$datasets$Owner +head(ws$datasets$Owner, n=20) ## ------------------------------------------------------------------------ airports <- download.datasets(ws, name = "Airport Codes Dataset", quote="\"") @@ -55,7 +52,7 @@ ws <- workspace() api <- publishWebService( ws, fun = add, - name = "AzureML-vignette-add", + name = "AzureML-vignette-silly", inputSchema = list( x = "numeric", y = "numeric" @@ -70,45 +67,76 @@ class(api) names(api) ## ----help---------------------------------------------------------------- -helpPageUrl <- api$HelpLocation -helpPageUrl +(helpPageUrl <- api$HelpLocation) ## ----update-------------------------------------------------------------- api <- updateWebService( ws, - fun = add, - name = "AzureML-vignette-add", + fun = function(x, y) x - y, inputSchema = list( - x = "numeric", + x = "numeric", y = "numeric" - ), + ), outputSchema = list( ans = "numeric" ), - wsid = api$WorkspaceId # <<-- Note you must add wsid to update! + serviceId = api$WebServiceId # <<-- Required to update! ) ## ----webservice---------------------------------------------------------- -webservices <- services(ws, name = "AzureML-vignette-add") +(webservices <- services(ws, name = "AzureML-vignette-silly")) ## ----endpoints----------------------------------------------------------- -ep <- endpoints(ws, webservices[1, ]$Id) +ep <- endpoints(ws, webservices[1, ]) class(ep) names(ep) -## ----discover------------------------------------------------------------ -discoverSchema(ep$HelpLocation) - ## ----df------------------------------------------------------------------ df <- data.frame( x = 1:5, y = 6:10 ) -s <- services(ws, name = "AzureML-vignette-add") +s <- services(ws, name = "AzureML-vignette-silly") s <- tail(s, 1) # use the last published function, in case of duplicate function names -ep <- endpoints(ws, s$Id) +ep <- endpoints(ws, s) consume(ep, df) ## ----delete-------------------------------------------------------------- -deleteWebService(ws, name = "AzureML-vignette-add") +deleteWebService(ws, name = "AzureML-vignette-silly") + +## ------------------------------------------------------------------------ +library(AzureML) +library(MASS) +library(gbm) + +ws <- workspace() +test <- Boston[1:5, 1:13] + +set.seed(123) +gbm1 <- gbm(medv ~ ., + distribution = "gaussian", + n.trees = 5000, + interaction.depth = 8, + n.minobsinnode = 1, + shrinkage = 0.01, + cv.folds = 5, + data = Boston, + n.cores = 1) # You can set this to n.cores = NULL to use all cores +best.iter <- gbm.perf(gbm1, method="cv", plot=FALSE) + +mypredict <- function(newdata) +{ + require(gbm) + predict(gbm1, newdata, best.iter) +} + +# Example use of the prediction function +print(mypredict(test)) + +# Publish the service +ep <- publishWebService(ws = ws, fun = mypredict, name = "AzureML-vignette-gbm", + inputSchema = test) + +# Consume test data, comparing with result above +print(consume(ep, test)) diff --git a/vignettes/getting_started.html b/vignettes/getting_started.html index 3cda4bd..3e28687 100644 --- a/vignettes/getting_started.html +++ b/vignettes/getting_started.html @@ -1,79 +1,221 @@ + + + - +Installation instructions - + + + + - - - + + - + -
- + + + +

Use this package to upload and download datasets to and from AzureML, to +interrogate experiments, to publish R functions as AzureML web services, and to +run R data through existing web services and retrieve the output.

-

Use this package to upload and download datasets to and from AzureML, to interrogate experiments, to publish R functions as AzureML web services, and to run R data through existing web services and retrieve the output.

-

Installation instructions

+

Install the development version of the package directly from GitHub with:

-
# Install devtools
+
+
# Install devtools
 if(!require("devtools")) install.packages("devtools")
-devtools::install_github("RevolutionAnalytics/azureml")
+devtools::install_github("RevolutionAnalytics/azureml") +
+

The package depends on:

+
  • jsonlite
  • curl
  • @@ -81,84 +223,155 @@

    Installation instructions

  • base64enc
  • uuid
+

Some of the examples use data and functions in:

+
  • lme4
  • ggplot2
-
-
+

Overview

-

AzureML provides an interface to publish web services on Microsoft Azure Machine Learning (Azure ML) from your local R environment. The main functions in the package cover the following topics:

+ +

AzureML provides an interface to publish web services on Microsoft Azure +Machine Learning (Azure ML) from your local R environment. The main functions +in the package cover the following topics:

+
  • Workspace: connect to and manage AzureML workspaces
  • Datasets: upload and download datasets to and from AzureML workspaces
  • Publish: publish R functions as AzureML web services, and update or delete existing services
  • Consume: apply any AzureML web service to your R data
-
+

Getting Started

-

To get started, please navigate to AzureML Studio and create a free account (not guest) or use your existing AzureML account. After logging in, under the “Settings” tab, copy and paste your Workspace ID from the “Name” sub-tab into your R console. From the “Authorization Tokens” sub-tab, copy your Primary Authorization Token into your R console. You will need this information to access all package functionality.

-

The package defines a Workspace class that represents an AzureML work space. Most of the functions in the package refer to a Workspace object directly or indirectly. Use the workspace() function to create Workspace objects, either by explicitly specifying an AzureML workspace ID and authorization token. Workspace objects are simply R environments that actively cache details about your AzureML sessions.

-
-
+ +

To get started, please navigate to AzureML Studio +and create a free account (not guest) or use your existing AzureML account. +After logging in, under the “Settings” tab, copy and paste your Workspace ID +from the “Name” sub-tab into your R console. From the “Authorization Tokens” +sub-tab, copy your Primary Authorization Token into your R console. You will +need this information to access all package functionality.

+ +

The package defines a Workspace class that represents an AzureML work space. +Most of the functions in the package refer to a Workspace object directly or +indirectly. Use the workspace() function to create Workspace objects, either +by explicitly specifying an AzureML workspace ID and authorization token. +Workspace objects are simply R environments that actively cache details about +your AzureML sessions.

+

Obtaining AzureML Credentials

-

Before using the package, it is necessary to first obtain the security credentials to your Azure Machine Learning workspace. You can find this be logging in at https://studio.azureml.net. If you do not have an account, you can create a free account (not guest) to use these APIs.

-

Once logged in, you will be brought to the Studio landing page. Using the left-hand menu, navigate to the ‘Settings’ tab to find your Workspace ID. Note this, or copy it into your R session and store it is a variable, e.g. myWsID.

+ +

Before using the package, it is necessary to first obtain the security +credentials to your Azure Machine Learning workspace. You can find this be +logging in at https://studio.azureml.net. If you do not +have an account, you can create a free account (not guest) to use these APIs.

+ +

Once logged in, you will be brought to the Studio landing page. Using the +left-hand menu, navigate to the 'Settings' tab to find your Workspace ID. Note +this, or copy it into your R session and store it is a variable, e.g. myWsID.

+

-

Next, within the ‘Settings’ tab, use the overhead menu to navigate to the ‘Authorization Tokens’ tab and similarly note your Primary Authorization Token.

+ +

Next, within the 'Settings' tab, use the overhead menu to navigate to the +'Authorization Tokens' tab and similarly note your Primary Authorization Token.

+

-
library(AzureML)
+
+
library(AzureML)
 ws <- workspace(
   id = "your workspace ID",
   auth = "your authorization token"
-)
-

or alternatively create a file in ~/.azureml/settings.json with the JSON structure (api_endpoint and management_endpoint are optional):

-
{"workspace": {
+)
+
+ +

or alternatively create a file in ~/.azureml/settings.json with the JSON +structure (api_endpoint and management_endpoint are optional):

+ +
{"workspace": {
    "id"                  : "test_id",
    "authorization_token" : "test_token",
    "api_endpoint"        : "api_endpoint",
    "management_endpoint" : "management_endpoint"
-}}
+}} +
+

See ?workspace for more details.

-
-
+

Examining workspace datasets, experiments and services

-

The datasets(), experiments(), and services() functions return data frames that contain information about those objects available in the workspace.

-

The package caches R data frame objects describing available datasets, experiments and services in the workspace environment. That cache can be refreshed at any time with the refresh() function. The data frame objects make it relatively easy to sort and filter the datasets, experiments, and services in arbitrary ways. The functions also include filtering options for specific and common filters, like looking up something by name.

-

Use the download.datasets() and upload.dataset() functions to download or upload data between R and your Azure workspace. The download.intermediate.dataset() function can download ephemeral data from a port in an experiment that is not explicitly stored in your Azure workspace.

+ +

The datasets(), experiments(), and services() functions return data +frames that contain information about those objects available in the workspace.

+ +

The package caches R data frame objects describing available datasets, +experiments and services in the workspace environment. That cache can be +refreshed at any time with the refresh() function. The data frame objects +make it relatively easy to sort and filter the datasets, experiments, and +services in arbitrary ways. The functions also include filtering options for +specific and common filters, like looking up something by name.

+ +

Use the download.datasets() and upload.dataset() functions to download or +upload data between R and your Azure workspace. The +download.intermediate.dataset() function can download ephemeral data from a +port in an experiment that is not explicitly stored in your Azure workspace.

+

Use delete.datasets() to remove and delete datasets from the workspace.

-

The endpoints() function describes Azure web service endpoints, and works with supporting help functions like endpointHelp().

-

The publishWebService() function publishes a custom R function as an AzureML web service, available for use by any client. The updateWebService() and deleteWebServce() update or delete existing web services, respectively.

-

Use the consume() function to evaluate an Azure ML web service with new data uploaded to AzureML from your R environment.

-
-
-
+ +

The endpoints() function describes Azure web service endpoints, and works with +supporting help functions like endpointHelp().

+ +

The publishWebService() function publishes a custom R function as an AzureML +web service, available for use by any client. The updateWebService() and +deleteWebServce() update or delete existing web services, respectively.

+ +

Use the consume() function to evaluate an Azure ML web service with +new data uploaded to AzureML from your R environment.

+

Examples

-

Work with the AzureML package begins by defining a workspace object. The example below uses the configured workspace ID and authorization token in the ~/.azureml/settings.json file. Alternatively specify these settings explicitly in the workspace() function as outlined above. All of the examples require this step.

-
library(AzureML)
+
+

Work with the AzureML package begins by defining a workspace object. The +example below uses the configured workspace ID and authorization token in the +~/.azureml/settings.json file. Alternatively specify these settings +explicitly in the workspace() function as outlined above. All of the examples +require this step.

+ +
library(AzureML)
 ws <- workspace()
-ws
+ws +
+
## AzureML Workspace
-## Workspace ID:  a2760707c7fa4245a057680427f31b17
-
+## Workspace ID : 68ef5aa9196142799d10bedd43f8254c +## API endpoint : https://studioapi.azureml.net + +

Service availability

-

AzureML is a web service and sometimes operations can’t immediately proceed due to rate limiting or other factors. When this kind of thing occurs, the AzureML R package presents a warning and retries the service a few times before giving up with an error.

-
-
+ +

AzureML is a web service and sometimes operations can't +immediately proceed due to rate limiting or other factors. When this +kind of thing occurs, the AzureML R package presents a warning and +retries the service a few times before giving up with an error.

+

Datasets

-

AzureML datasets correspond more or less to R data frames. The AzureML package defines four basic dataset operations: list, upload, download, and delete.

-
+ +

AzureML datasets correspond more or less to R data frames. The AzureML +package defines four basic dataset operations: list, upload, download, and +delete.

+

List available datasets

+

The following example illustrates listing available datasets in your workspace.

-
head(datasets(ws))     # Or, equivalently: head(ws$datasets)
-
##                                           Name DataTypeId Size ...
-## 1 Result Dataset (saved from Execute R Script)    Dataset 3535 ...
-## 2     dataset-test-upload-2015-11-06--04-35-29 GenericTSV 2901 ...
-## 3     dataset-test-upload-2015-11-06--12-26-32 GenericTSV 2901 ...
-## 4     dataset-test-upload-2015-11-17--18-00-22 GenericTSV 2901 ...
-## 5     dataset-test-upload-2015-11-18--09-51-45 GenericTSV 2901 ...
-## 6     dataset-test-upload-2015-11-18--09-59-35 GenericTSV 2901 ...
+
+
head(datasets(ws))     # Or, equivalently: head(ws$datasets)
+
+ +
##                                       Name         DataTypeId  Size ...
+## 1                   text.preprocessing.zip                Zip  2782 ...
+## 2                    fraudTemplateUtil.zip                Zip  3471 ...
+## 3 Sample Named Entity Recognition Articles GenericTSVNoHeader   236 ...
+## 4                       Breast cancer data               ARFF 15170 ...
+## 5                        Forest fires data               ARFF 26285 ...
+## 6                      Iris Two Class Data               ARFF  2004 ...
 ## ----------------------------------------------
 ## AzureML datasets data.frame variables include:
 ##  [1] "VisualizeEndPoint"    "SchemaEndPoint"       "SchemaStatus"        
@@ -169,38 +382,61 @@ 

List available datasets

## [16] "PromotedFrom" "UploadedFromFilename" "ServiceVersion" ## [19] "IsLatest" "Category" "DownloadLocation" ## [22] "IsDeprecated" "Culture" "Batch" -## [25] "CreatedDateTicks"
-

The list of datasets is presented as an a R data frame with class Datasets. Its print method shows a summary of the datasets, along with all of the available variables. Use any normal R data frame operation to manipulate the datasets. For example, to see the “Owner” value of each dataset:

-
head(ws$datasets$Owner, n=20)
-
##  [1] "bwaynelewis"           "R"                    
-##  [3] "R"                     "R"                    
-##  [5] "R"                     "R"                    
-##  [7] "R"                     "Microsoft Corporation"
+## [25] "CreatedDateTicks"
+
+ +

The list of datasets is presented as an a R data frame with class Datasets. +Its print method shows a summary of the datasets, along with all of the +available variables. Use any normal R data frame operation to manipulate the +datasets. For example, to see the “Owner” value of each dataset:

+ +
head(ws$datasets$Owner, n=20)
+
+ +
##  [1] "Microsoft Corporation" "Microsoft Corporation"
+##  [3] "Microsoft Corporation" "Microsoft Corporation"
+##  [5] "Microsoft Corporation" "Microsoft Corporation"
+##  [7] "Microsoft Corporation" "Microsoft Corporation"
 ##  [9] "Microsoft Corporation" "Microsoft Corporation"
 ## [11] "Microsoft Corporation" "Microsoft Corporation"
 ## [13] "Microsoft Corporation" "Microsoft Corporation"
 ## [15] "Microsoft Corporation" "Microsoft Corporation"
 ## [17] "Microsoft Corporation" "Microsoft Corporation"
-## [19] "Microsoft Corporation" "Microsoft Corporation"
-
-
+## [19] "Microsoft Corporation" "Microsoft Corporation" + +

Downloading datasets

-

The next example illustrates downloading a specific dataset named “Airport Codes Dataset” from AzureML to your R session. This dataset is presented by AzureML as a “Generic CSV” dataset, and will be parsed by R’s read.table() function. (Other formats are parsed by an appropriate parser, for example read.arff().) The example illustrates passing additional arguments to the read.table() function used to parse the data from AzureML in this case.

-
airports <- download.datasets(ws, name = "Airport Codes Dataset", quote="\"")
-head(airports)
+ +

The next example illustrates downloading a specific dataset named “Airport +Codes Dataset” from AzureML to your R session. This dataset is presented by +AzureML as a “Generic CSV” dataset, and will be parsed by R's read.table() +function. (Other formats are parsed by an appropriate parser, for example +read.arff().) The example illustrates passing additional arguments to the +read.table() function used to parse the data from AzureML in this case.

+ +
airports <- download.datasets(ws, name = "Airport Codes Dataset", quote="\"")
+head(airports)
+
+
##   airport_id        city state                                 name
 ## 1      10165 Adak Island    AK                                 Adak
 ## 2      10299   Anchorage    AK  Ted Stevens Anchorage International
 ## 3      10304       Aniak    AK                        Aniak Airport
 ## 4      10754      Barrow    AK      Wiley Post/Will Rogers Memorial
 ## 5      10551      Bethel    AK                       Bethel Airport
-## 6      10926     Cordova    AK                Merle K Mudhole Smith
-

You can use download.datasets() to download more than one dataset as a time, returning the results in a list of data frames.

-
-
+## 6 10926 Cordova AK Merle K Mudhole Smith + + +

You can use download.datasets() to download more than one dataset as a time, +returning the results in a list of data frames.

+

Uploading R data frames as AzureML datasets and deleting datasets

+

Use the upload.dataset() function to upload R data frames to AzureML.

-
upload.dataset(airquality, ws, name = "Air quality")
+ +
upload.dataset(airquality, ws, name = "Air quality")
+
+
##          Name DataTypeId Size ...
 ## 1 Air quality GenericTSV 2901 ...
 ## ----------------------------------------------
@@ -213,28 +449,45 @@ 

Uploading R data frames as AzureML datasets and deleting datasets

## [16] "PromotedFrom" "UploadedFromFilename" "ServiceVersion" ## [19] "IsLatest" "Category" "DownloadLocation" ## [22] "IsDeprecated" "Culture" "Batch" -## [25] "CreatedDateTicks"
-
# Let's see what we've got:
-head(download.datasets(ws, name = "Air quality"))
+## [25] "CreatedDateTicks" + + +
# Let's see what we've got:
+head(download.datasets(ws, name = "Air quality"))
+
+
##   Ozone Solar.R Wind Temp Month Day
 ## 1    41     190  7.4   67     5   1
 ## 2    36     118  8.0   72     5   2
 ## 3    12     149 12.6   74     5   3
 ## 4    18     313 11.5   62     5   4
 ## 5    NA      NA 14.3   56     5   5
-## 6    28      NA 14.9   66     5   6
+## 6 28 NA 14.9 66 5 6 + +

Delete one or more AzureML datasets with delete.datasets():

-
delete.datasets(ws, name="Air quality")
-
## Request failed with status 400. Retrying request...
+ +
delete.datasets(ws, name="Air quality")
+
+ +
## Request failed with status 400. Waiting 1 seconds before retry
+## .
+
+
##          Name Deleted status_code
-## 1 Air quality    TRUE         204
-
-
-
+## 1 Air quality TRUE 204 + +

Experiments

-

Use the experiments() function or simply use the ws$experiments data frame object directly to list details about experiments in your AzureML workspace. The experiments() function optionally filters experiments by ownership.

-
e <- experiments(ws, filter = "samples")
-head(e)
+ +

Use the experiments() function or simply use the ws$experiments data frame +object directly to list details about experiments in your AzureML workspace. +The experiments() function optionally filters experiments by ownership.

+ +
e <- experiments(ws, filter = "samples")
+head(e)
+
+
##                                        Description        CreationTime ...
 ## 1  Sample 6: Train, Test, Evaluate for Regression: 2015-08-27 21:34:57 ...
 ## 2 Text Classification: Step 2 of 5, text preproces 2015-08-27 21:39:38 ...
@@ -261,23 +514,40 @@ 

Experiments

## [15] "CreationTime" ## [16] "StartTime" ## [17] "EndTime" -## [18] "Metadata"
-

The ws$experiments object is just an R data frame with class Experiments. Its print method shows a summary of the available experiments, but it can otherwise be manipulated like a normal R data frame.

-

The list of experiments in your workspace is cached in the workspace environment. Use the refresh() function to explicitly update the cache at any time, for example:

-
refresh(ws, "experiments")
-
-
-
+## [18] "Metadata" + + +

The ws$experiments object is just an R data frame with class Experiments. +Its print method shows a summary of the available experiments, but it can +otherwise be manipulated like a normal R data frame.

+ +

The list of experiments in your workspace is cached in the workspace +environment. Use the refresh() function to explicitly update the cache at any +time, for example:

+ +
refresh(ws, "experiments")
+
+

Web Services

-

The AzureML package helps you to publish R functions as AzureML web services that can be consumed anywhere. You can also use the AzureML package to run R data through an existing web service and collect the output.

-
+ +

The AzureML package helps you to publish R functions as AzureML web services +that can be consumed anywhere. You can also use the AzureML package to run R +data through an existing web service and collect the output.

+

Publishing a Web Service

-

The publishWebService() publishes an R function as an AzureML web service. Consider this simple example R function:

-
add <- function(x, y) {
+
+

The publishWebService() publishes an R function as an AzureML web service. +Consider this simple example R function:

+ +
add <- function(x, y) {
   x + y
-}
-

Use the function publishWebService() to publish the function as a service named “AzureML-vignette-silly”:

-
ws <- workspace()
+}
+
+ +

Use the function publishWebService() to publish the function as a +service named “AzureML-vignette-silly”:

+ +
ws <- workspace()
 api <- publishWebService(
   ws,
   fun = add, 
@@ -289,25 +559,62 @@ 

Publishing a Web Service

outputSchema = list( ans = "numeric" ) -)
-

The example publishes a function of two scalar numeric arguments, returning a single numeric scalar output value. Note that we explicitly define the web service input and output schema in the example. See the examples below for more flexible ways of defining web services with functions of data frames.

-

The result of publishWebService() is an Endpoint object, really just an R data frame with two elements: a list containing the details of the newly created web service, and a list of the endpoints of the web service. From here, you can pass the information on to another user, or use the information to use the web service from R:

-
class(api)
-
## [1] "Endpoint"   "data.frame"
-
names(api)
+) +
+ +

The example publishes a function of two scalar numeric arguments, returning a +single numeric scalar output value. Note that we explicitly define the web +service input and output schema in the example. See the examples below for more +flexible ways of defining web services with functions of data frames.

+ +

The result of publishWebService() is an Endpoint object, really just an R +data frame with two elements: a list containing the details of the newly +created web service, and a list of the endpoints of the web service. From here, +you can pass the information on to another user, or use the information to use +the web service from R:

+ +
class(api)
+
+ +
## [1] "Endpoint"   "data.frame"
+
+ +
names(api)
+
+
##  [1] "Name"                  "Description"          
 ##  [3] "CreationTime"          "WorkspaceId"          
 ##  [5] "WebServiceId"          "HelpLocation"         
 ##  [7] "PrimaryKey"            "SecondaryKey"         
-##  [9] "ApiLocation"           "MaxConcurrentCalls"   
-## [11] "DiagnosticsTraceLevel" "ThrottleLevel"
-

The web service created is identical to a web service published through the Azure Machine Learning Studio. From the response, you can get the Web Service’s URL, API Key and Help Page URL, as shown above. The first two are needed to make calls to the web service. The latter has the sample code, sample request and other information for consuming the API from client apps such as mobile and web applications.

-

The new web service will show up on the ‘Web Services’ tab of the Studio interface, and the service will have a help page for each endpoint, e.g.

+## [9] "ApiLocation" "PreventUpdate" +## [11] "MaxConcurrentCalls" "DiagnosticsTraceLevel" +## [13] "ThrottleLevel" + + +

The web service created is identical to a web service published through the +Azure Machine Learning Studio. From the response, you can get the Web Service's +URL, API Key and Help Page URL, as shown above. The first two are needed to +make calls to the web service. The latter has the sample code, sample request +and other information for consuming the API from client apps such as mobile and +web applications.

+ +

The new web service will show up on the 'Web Services' tab of the Studio +interface, and the service will have a help page for each endpoint, e.g.

+

Note that AzureML allows multiple services to have the same name.

-
(helpPageUrl <- api$HelpLocation)
-
## [1] "https://studio.azureml.net/apihelp/workspaces/a2760707c7fa4245a057680427f31b17/webservices/666b3068900f11e5b1665c94780a34d3/endpoints/e84e9f1701ec4b5b884d24786ced9a30"
-

Once published, you can update a web service using the updateWebService() or publishWebService() functions. The updateWebService() function is just an alias for publishWebService(), except that the argument serviceId is compulsory.

-
api <- updateWebService(
+
+
(helpPageUrl <- api$HelpLocation)
+
+ +
## [1] "https://studio.azureml.net/apihelp/workspaces/68ef5aa9196142799d10bedd43f8254c/webservices/c83e7b30a56b11e5a91011e99dfb4827/endpoints/208ba34b4bf14c12ba50684429f6590e"
+
+ +

Once published, you can update a web service using the updateWebService() or +publishWebService() functions. The updateWebService() function is just an +alias for publishWebService(), except that the argument serviceId is +compulsory.

+ +
api <- updateWebService(
   ws,
   fun = function(x, y) x - y,
   inputSchema = list(
@@ -318,68 +625,119 @@ 

Publishing a Web Service

ans = "numeric" ), serviceId = api$WebServiceId # <<-- Required to update! -)
-

The “AzureML-vignette-silly” service now substracts two numbers instead of adding them.

-
-
+) + + +

The “AzureML-vignette-silly” service now substracts two numbers instead of adding them.

+

Discovering Web Services

-

Use the services() function to list in detail all of the available services in your AzureML workspace, or filter by web service name as shown below:

-
(webservices <- services(ws, name = "AzureML-vignette-silly"))
-
##                                  Id                   Name
-## 31 666b3068900f11e5b1665c94780a34d3 AzureML-vignette-silly
-##                CreationTime                      WorkspaceId
-## 31 2015-11-21T05:19:14.227Z a2760707c7fa4245a057680427f31b17
-##    DefaultEndpointName EndpointCount
-## 31             default             1
-

Given a service, use the endpoints() function to list the AzureML service endpoints for the service:

-
ep <- endpoints(ws, webservices[1, ])
-class(ep)
-
## [1] "Endpoint"   "data.frame"
-
names(ep)
+ +

Use the services() function to list in detail all of the available services +in your AzureML workspace, or filter by web service name as shown below:

+ +
(webservices <- services(ws, name = "AzureML-vignette-silly"))
+
+ +
##                                 Id                   Name
+## 5 c83e7b30a56b11e5a91011e99dfb4827 AzureML-vignette-silly
+##               CreationTime                      WorkspaceId
+## 5 2015-12-18T09:43:24.779Z 68ef5aa9196142799d10bedd43f8254c
+##   DefaultEndpointName EndpointCount
+## 5             default             1
+
+ +

Given a service, use the endpoints() function to list the AzureML +service endpoints for the service:

+ +
ep <- endpoints(ws, webservices[1, ])
+class(ep)
+
+ +
## [1] "Endpoint"   "data.frame"
+
+ +
names(ep)
+
+
##  [1] "Name"                  "Description"          
 ##  [3] "CreationTime"          "WorkspaceId"          
 ##  [5] "WebServiceId"          "HelpLocation"         
 ##  [7] "PrimaryKey"            "SecondaryKey"         
-##  [9] "ApiLocation"           "MaxConcurrentCalls"   
-## [11] "DiagnosticsTraceLevel" "ThrottleLevel"
-

The returned Endpoints object contains all the information needed to consume a web service. The endpointHelp() function returns detailed information about an endpoint including its input and output schema and URI.

-
-
+## [9] "ApiLocation" "PreventUpdate" +## [11] "MaxConcurrentCalls" "DiagnosticsTraceLevel" +## [13] "ThrottleLevel" + + +

The returned Endpoints object contains all the information needed to consume a web +service. The endpointHelp() function returns detailed information about an endpoint +including its input and output schema and URI.

+

Consuming Web Services

-

Use the consume() function to send data to your newly published web service API for scoring.

-
df <- data.frame(
+
+

Use the consume() function to send data to your newly published web service +API for scoring.

+ +
df <- data.frame(
   x = 1:5,
   y = 6:10
 )
 s <- services(ws, name = "AzureML-vignette-silly")
 s <- tail(s, 1) # use the last published function, in case of duplicate function names
 ep <- endpoints(ws, s)
-consume(ep, df)
+consume(ep, df) +
+
##   ans
-## 1  -5
-## 2  -5
-## 3  -5
-## 4  -5
-## 5  -5
-

Alternatively, the endpoint primary key and API location can be found on the help page for that specific endpoint, which can be found on Azure Machine Learning Studio. Using the Help Page URL, you can access sample code to build clients that can consume this web service in real time to make predictions.

-
-
+## 1 7 +## 2 9 +## 3 11 +## 4 13 +## 5 15 + + +

Alternatively, the endpoint primary key and API location can be found on the +help page for that specific endpoint, which can be found on Azure Machine +Learning Studio. Using the Help Page URL, you can access sample code to build +clients that can consume this web service in real time to make predictions.

+

Deleting a Web Service

-

Use deleteWebservice() to remove a webservice endpoint that you no longer need or want (like these silly examples):

-
deleteWebService(ws, name = "AzureML-vignette-silly")
-
-
+ +

Use deleteWebservice() to remove a webservice endpoint that you no longer need +or want (like these silly examples):

+ +
deleteWebService(ws, name = "AzureML-vignette-silly")
+
+

Other examples of publishing web services

-

The simplest and perhaps most useful way to define a web service uses functions that take a single data frame argument and return a vector or data frame of results. The next example trains a generalized boosted regression model using the gbm package, publishes the model as a web service with name “AzureML-vignette-gbm”, and runs example data through the model for prediction using the consume() function.

-
library(AzureML)
+
+

The simplest and perhaps most useful way to define a web service uses functions +that take a single data frame argument and return a vector or data frame of +results. The next example trains a generalized boosted regression model using +the gbm package, publishes the model as a web service with name +“AzureML-vignette-gbm”, and runs example data through the model for prediction +using the consume() function.

+ +
library(AzureML)
 library(MASS)
-library(gbm)
+
+ +
## Warning: package 'MASS' was built under R version 3.1.3
+
+ +
library(gbm)
+
+ +
## Warning: package 'gbm' was built under R version 3.1.3
+
+
## Loading required package: survival
-## Loading required package: lattice
 ## Loading required package: splines
+## Loading required package: lattice
 ## Loading required package: parallel
-## Loaded gbm 2.1.1
-
ws <- workspace()
+## Loaded gbm 2.1.1
+
+ +
ws <- workspace()
 test <- Boston[1:5, 1:13]
 
 set.seed(123)
@@ -390,7 +748,8 @@ 

Other examples of publishing web services

n.minobsinnode = 1, shrinkage = 0.01, cv.folds = 5, - data = Boston) + data = Boston, + n.cores = 1) # You can set this to n.cores = NULL to use all cores best.iter <- gbm.perf(gbm1, method="cv", plot=FALSE) mypredict <- function(newdata) @@ -400,56 +759,70 @@

Other examples of publishing web services

} # Example use of the prediction function -print(mypredict(test))
-
## [1] 24.54431 21.15155 33.88859 34.06615 34.93906
-
# Publish the service
+print(mypredict(test))
+
+ +
## [1] 24.54431 21.15155 33.88859 34.06615 34.93906
+
+ +
# Publish the service
 ep <- publishWebService(ws = ws, fun = mypredict, name = "AzureML-vignette-gbm",
                         inputSchema = test)
 
 # Consume test data, comparing with result above
-print(consume(ep, test))
-
## Request failed with status 401. Retrying request...
+print(consume(ep, test)) + +
##        ans
 ## 1 24.54431
 ## 2 21.15155
 ## 3 33.88859
 ## 4 34.06615
-## 5 34.93906
-

Notice that we don’t need to explicitly specific the inputSchema or outputSchema arguments when working with functions that use data frame I/O. When finished with this example, we can delete the example service with:

-
deleteWebService(ws, "AzureML-vignette-gbm")
-
-
+## 5 34.93906 + + +

Notice that we don't need to explicitly specific the inputSchema or +outputSchema arguments when working with functions that use data frame I/O. +When finished with this example, we can delete the example service with:

+ +
deleteWebService(ws, "AzureML-vignette-gbm")
+
+

Tips on writing functions used in web services

-

Try to use the data frame I/O interface as illustrated in the last example above. It’s simpler and more robust than using functions of scalars or lists and exhibits faster execution for large data sets.

-

Use require in your function to explicitly load required packages.

-

The publishWebServce() function uses codetools to bundle objects required by your function following R lexical scoping rules. The previous example, for instance, uses the best.iter and gbm1 variables inside of the mypredict() function. publishWebService() identified that and included their definitions in the R environment in which the function is evaluated in AzureML. Fine-grained control over the export of variables is provided by the publishWebService() function in case you need it (see the help page for details).

-

Use the packages option of publishWebService() to explicitly bundle required packages and their dependencies (but not suggested dependencies) using miniCRAN. This lets you upload packages to AzureML that may not otherwise be available in that environment already, using the correct R version and platform used by AzureML.

-

Be aware that the version of R running in AzureML may not be the same as the version of R that you are running locally. That means that some packages might not be available, or sometimes package behavior in the AzureML version of R might be different that what you observe locally. This is generally more of an issue for cutting-edge packages.

-

JSON is used to transfer data between your local R environment and the R services running in AzureML–numeric values experience a change of base, which can lead to a small loss of precision in some circumstances. If you really, really need to move binary objects between your local R session and the AzureML R service you might try base64 encoding the data, for example.

-
-
+

Try to use the data frame I/O interface as illustrated in the last example +above. It's simpler and more robust than using functions of scalars or lists +and exhibits faster execution for large data sets.

-
+

Use require in your function to explicitly load required packages.

- +

Be aware that the version of R running in AzureML may not be the same as the +version of R that you are running locally. That means that some packages might +not be available, or sometimes package behavior in the AzureML version of R +might be different that what you observe locally. This is generally more of an +issue for cutting-edge packages.

- - +

JSON is used to transfer data between your local R environment and the R +services running in AzureML–numeric values experience a change of base, which +can lead to a small loss of precision in some circumstances. If you really, +really need to move binary objects between your local R session and the AzureML +R service you might try base64 encoding the data, for example.

+ From 7cdff48ad57485d7bebce35789055fba85bd4900 Mon Sep 17 00:00:00 2001 From: Andrie de Vries Date: Fri, 18 Dec 2015 09:47:59 +0000 Subject: [PATCH 14/14] Bump version number to 0.2.6 --- DESCRIPTION | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 433e22d..fb085a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,11 @@ Package: AzureML Type: Package Title: Interface with Azure Machine Learning datasets and web services -Description: Functions and datasets to support Azure Machine Learning. This allows you to interact with datasets, as well as publish and consume R functions as API services. -Version: 0.2.5 -Date: 2015-12-08 +Description: Functions and datasets to support Azure Machine Learning. This + allows you to interact with datasets, as well as publish and consume R functions + as API services. +Version: 0.2.6 +Date: 2015-12-18 Authors@R: c( person("Raymond", "Laghaeian", role=c("aut", "cre"), email="raymondl@microsoft.com"), person(family="Microsoft Corporation", role="cph"), @@ -15,18 +17,20 @@ URL: https://github.com/RevolutionAnalytics/AzureML BugReports: https://github.com/RevolutionAnalytics/AzureML/issues LazyData: TRUE VignetteBuilder: knitr -SystemRequirements: Requires external zip utility, available in path. On windows, it's sufficient to install RTools. +SystemRequirements: Requires external zip utility, available in path. On + windows, it's sufficient to install RTools. Imports: - jsonlite(>= 0.9.16), - curl(>= 0.8), + jsonlite(>= 0.9.16), + curl(>= 0.8), foreign, codetools, base64enc, miniCRAN, uuid Suggests: - testthat, - knitr, + testthat, + knitr, lme4, gbm, MASS +RoxygenNote: 5.0.1