feddelegrand7 · feddelegrand7 · Jun 18, 2022 · Jun 14, 2022 · Jun 14, 2022 · Jun 14, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -29,4 +29,4 @@ Suggests:
     testthat,
     rmarkdown,
     covr
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.0
diff --git a/R/table_scrap.R b/R/table_scrap.R
@@ -8,7 +8,6 @@
 #' @param choose an integer indicating which table to scrape
 #' @param header do you want the first line to be the leader (default to TRUE)
 #' @param askRobot logical. Should the function ask the robots.txt if we're allowed or not to scrape the web page ? Default is FALSE.
-#' @param fill logical. Should be set to TRUE when the table has an inconsistent number of columns.
 #' @return a data frame object.
 #' @examples \donttest{
 #' # Extracting premier ligue 2019/2020 top scorers
@@ -30,60 +29,58 @@
 table_scrap <- function(link,
                         choose = 1,
                         header = TRUE,
-                        fill = FALSE,
                         askRobot = FALSE) {
 
 
-if(missing(link)) {
-  stop("'link' is a mandatory parameter")
-}
+  if(missing(link)) {
+    stop("'link' is a mandatory parameter")
+  }
 
 
-if(!is.character(link)) {
-  stop("'link' parameter must be provided as a character string")
-}
+  if(!is.character(link)) {
+    stop("'link' parameter must be provided as a character string")
+  }
 
 
-if(!is.numeric(choose)){
-  stop(paste0("the 'choose' parameter must be provided as numeric not as "),
-       typeof(choose))
-}
+  if(!is.numeric(choose)){
+    stop(paste0("the 'choose' parameter must be provided as numeric not as "),
+         typeof(choose))
+  }
 
 
-############################## Ask robot part ###################################################
+  ############################## Ask robot part ###################################################
 
-    if (askRobot) {
-      if (paths_allowed(link) == TRUE) {
-        message(green("the robot.txt doesn't prohibit scraping this web page"))
+  if (askRobot) {
+    if (paths_allowed(link) == TRUE) {
+      message(green("the robot.txt doesn't prohibit scraping this web page"))
 
-      } else {
-        message(bgRed(
-          "WARNING: the robot.txt doesn't allow scraping this web page"
-        ))
-
-      }
+    } else {
+      message(bgRed(
+        "WARNING: the robot.txt doesn't allow scraping this web page"
+      ))
 
     }
 
-#################################################################################################
+  }
 
+  #################################################################################################
 
-tryCatch(
+  tryCatch(
 
-expr = {
+    expr = {
 
-table <- link %>%
-      read_html() %>%
-      html_table(header, fill = fill)
+      table <- link %>%
+        read_html() %>%
+        html_table(header)
 
-chosen_table <- table[[choose]]
+      chosen_table <- table[[choose]]
 
-return(chosen_table)
+      return(chosen_table)
 
 
-  },
+    },
 
-error = function(cond){
+    error = function(cond){
 
       if(!has_internet()){
 
@@ -93,18 +90,18 @@ error = function(cond){
 
       } else if (grepl("current working directory", cond) || grepl("HTTP error 404", cond)) {
 
-          message(paste0("The URL doesn't seem to be a valid one: ", link))
+        message(paste0("The URL doesn't seem to be a valid one: ", link))
 
-          message(paste0("Here the original error message: ", cond))
+        message(paste0("Here the original error message: ", cond))
 
-          return(NA)
+        return(NA)
 
 
       } else if(grepl("subscript out of bounds", cond)) {
 
         message(
-        "Are you sure that your web page contains more than one HTML table ?"
-          )
+          "Are you sure that your web page contains more than one HTML table ?"
+        )
 
         message(paste0("Here the original error message: ", cond))
 
@@ -117,6 +114,6 @@ error = function(cond){
         return(NA)
 
       }
-}
+    }
 
-)}
+  )}
diff --git a/README.Rmd b/README.Rmd
@@ -51,20 +51,21 @@ devtools::install_github("feddelegrand7/ralger")
 ```
 ## `scrap()`
 
-This is an example which shows how to extract [top ranked universities' names](http://www.shanghairanking.com/ARWU2020.html) according to the ShanghaiRanking Consultancy:
+This is an example which shows how to extract [top ranked universities' names](http://www.shanghairanking.com/rankings/arwu/2021) according to the ShanghaiRanking Consultancy:
 
 
 ```{r example}
 library(ralger)
 
-my_link <- "http://www.shanghairanking.com/ARWU2020.html"
+my_link <- "http://www.shanghairanking.com/rankings/arwu/2021"
 
-my_node <- "#UniversityRanking a" # The element ID , I recommend SelectorGadget if you're not familiar with CSS selectors
+my_node <- "a span" # The element ID , I recommend SelectorGadget if you're not familiar with CSS selectors
 
-best_uni <- scrap(link = my_link, node = my_node)
+clean <- TRUE # Should the function clean the extracted vector or not ? Default is FALSE
 
-head(best_uni, 10)
+best_uni <- scrap(link = my_link, node = my_node, clean = clean)
 
+head(best_uni, 10)
 
 ```