diff --git a/DESCRIPTION b/DESCRIPTION index 5e3615c..7e9a9bc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: quanteda.textplots Title: Plots for the Quantitative Analysis of Textual Data -Version: 0.94.3 +Version: 0.94.4 Authors@R: c( person("Kenneth", "Benoit", email = "kbenoit@lse.ac.uk", role = c("cre", "aut", "cph"), comment = c(ORCID = "0000-0002-0797-564X")), @@ -44,4 +44,4 @@ Encoding: UTF-8 BugReports: https://github.com/quanteda/quanteda.textplots/issues Language: en-GB Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/R/textplot_keyness.R b/R/textplot_keyness.R index f59162c..17b9a44 100644 --- a/R/textplot_keyness.R +++ b/R/textplot_keyness.R @@ -27,10 +27,10 @@ #' \dontrun{ #' library("quanteda") #' # compare Trump speeches to other Presidents by chi^2 -#' dfmat1 <- data_corpus_inaugural %>% -#' corpus_subset(Year > 1980) %>% -#' tokens(remove_punct = TRUE) %>% -#' tokens_remove(stopwords("en")) %>% +#' dfmat1 <- data_corpus_inaugural |> +#' corpus_subset(Year > 1980) |> +#' tokens(remove_punct = TRUE) |> +#' tokens_remove(stopwords("en")) |> #' dfm() #' dfmat1 <- dfm_group(dfmat1, groups = dfmat1$President) #' tstat1 <- quanteda.textstats::textstat_keyness(dfmat1, target = "Trump") @@ -39,13 +39,13 @@ #' textplot_keyness(tstat1, margin = 0.2, n = 10) #' #' # compare contemporary Democrats v. Republicans -#' corp <- data_corpus_inaugural %>% +#' corp <- data_corpus_inaugural |> #' corpus_subset(Year > 1960) #' corp$party <- ifelse(docvars(corp, "President") %in% c("Nixon", "Reagan", "Bush", "Trump"), #' "Republican", "Democrat") -#' dfmat2 <- corp %>% -#' tokens(remove_punct = TRUE) %>% -#' tokens_remove(stopwords("en")) %>% +#' dfmat2 <- corp |> +#' tokens(remove_punct = TRUE) |> +#' tokens_remove(stopwords("en")) |> #' dfm() #' tstat2 <- quanteda.textstats::textstat_keyness(dfm_group(dfmat2, groups = dfmat2$party), #' target = "Democrat", measure = "lr") diff --git a/R/textplot_network.R b/R/textplot_network.R index e0fd1c6..5512e70 100644 --- a/R/textplot_network.R +++ b/R/textplot_network.R @@ -34,19 +34,22 @@ #' @examples #' set.seed(100) #' library("quanteda") -#' toks <- data_char_ukimmig2010 %>% -#' tokens(remove_punct = TRUE) %>% -#' tokens_tolower() %>% +#' toks <- data_char_ukimmig2010 |> +#' tokens(remove_punct = TRUE) |> +#' tokens_tolower() |> #' tokens_remove(pattern = stopwords("english"), padding = FALSE) #' fcmat <- fcm(toks, context = "window", tri = FALSE) -#' feat <- names(topfeatures(fcmat, 30)) -#' fcm_select(fcmat, pattern = feat) %>% +#' feat <- colSums(fcmat) |> +#' sort(decreasing = TRUE) |> +#' head(30) |> +#' names() +#' fcm_select(fcmat, pattern = feat) |> #' textplot_network(min_freq = 0.5) -#' fcm_select(fcmat, pattern = feat) %>% +#' fcm_select(fcmat, pattern = feat) |> #' textplot_network(min_freq = 0.8) -#' fcm_select(fcmat, pattern = feat) %>% +#' fcm_select(fcmat, pattern = feat) |> #' textplot_network(min_freq = 0.8, vertex_labelcolor = rep(c('gray40', NA), 15)) -#' fcm_select(fcmat, pattern = feat) %>% +#' fcm_select(fcmat, pattern = feat) |> #' textplot_network(vertex_labelsize = 10) #' fcm_30 <- fcm_select(fcmat, pattern = feat) #' textplot_network(fcm_30, @@ -198,7 +201,7 @@ as.igraph <- function(x, ...) UseMethod("as.igraph") #' # as.igraph #' if (requireNamespace("igraph", quietly = TRUE)) { #' txt <- c("a a a b b c", "a a c e", "a c e f g") -#' mat <- fcm(txt) +#' mat <- fcm(tokens(txt)) #' as.igraph(mat, min_freq = 1, omit_isolated = FALSE) #' } as.igraph.fcm <- function(x, min_freq = 0.5, omit_isolated = TRUE, ...) { diff --git a/R/textplot_wordcloud.R b/R/textplot_wordcloud.R index 4ed36f1..82038ee 100644 --- a/R/textplot_wordcloud.R +++ b/R/textplot_wordcloud.R @@ -57,11 +57,11 @@ #' # plot the features (without stopwords) from Obama's inaugural addresses #' set.seed(10) #' library("quanteda") -#' dfmat1 <- data_corpus_inaugural %>% -#' corpus_subset(President == "Obama") %>% -#' tokens(remove_punct = TRUE) %>% -#' tokens_remove(stopwords("en")) %>% -#' dfm() %>% +#' dfmat1 <- data_corpus_inaugural |> +#' corpus_subset(President == "Obama") |> +#' tokens(remove_punct = TRUE) |> +#' tokens_remove(stopwords("en")) |> +#' dfm() |> #' dfm_trim(min_termfreq = 3) #' #' # basic wordcloud @@ -77,12 +77,12 @@ #' color = col, rotation = FALSE) #' #' # comparison plot of Obama v. Trump -#' dfmat2 <- data_corpus_inaugural %>% -#' corpus_subset(President %in% c("Obama", "Trump")) %>% -#' tokens(remove_punct = TRUE) %>% -#' tokens_remove(stopwords("en")) %>% +#' dfmat2 <- data_corpus_inaugural |> +#' corpus_subset(President %in% c("Obama", "Trump")) |> +#' tokens(remove_punct = TRUE) |> +#' tokens_remove(stopwords("en")) |> #' dfm() -#' dfmat2 <- dfm_group(dfmat2, dfmat2$President) %>% +#' dfmat2 <- dfm_group(dfmat2, dfmat2$President) |> #' dfm_trim(min_termfreq = 3) #' #' textplot_wordcloud(dfmat2, comparison = TRUE, max_words = 100, @@ -90,10 +90,10 @@ #' #' \dontrun{ #' # for keyness -#' tstat <- data_corpus_inaugural[c(1, 3)] %>% -#' tokens(remove_punct = TRUE) %>% -#' tokens_remove(stopwords("en")) %>% -#' dfm() %>% +#' tstat <- data_corpus_inaugural[c(1, 3)] |> +#' tokens(remove_punct = TRUE) |> +#' tokens_remove(stopwords("en")) |> +#' dfm() |> #' quanteda.textstats::textstat_keyness() #' textplot_wordcloud(tstat, min_count = 2) #' textplot_wordcloud(tstat, min_count = 2, comparison = FALSE) @@ -367,9 +367,7 @@ wordcloud <- function(x, min_size, max_size, min_count, max_words, #' @param min.freq deprecated argument #' @param max.words deprecated argument #' @param random.order deprecated argument -#' @param random.color deprecated argument #' @param rot.per deprecated argument -#' @param ordered.colors deprecated argument #' @param use.r.layout deprecated argument #' @param title.size deprecated argument #' @importFrom quanteda dfm_trim dfm_weight diff --git a/R/textplot_xray.R b/R/textplot_xray.R index 6effa01..dece41d 100644 --- a/R/textplot_xray.R +++ b/R/textplot_xray.R @@ -24,26 +24,28 @@ #' `textplot_xray`. #' @examples #' library("quanteda") -#' corp <- corpus_subset(data_corpus_inaugural, Year > 1970) +#' toks <- data_corpus_inaugural |> +#' corpus_subset(Year > 1970) |> +#' tokens() #' # compare multiple documents -#' textplot_xray(kwic(corp, pattern = "american")) -#' textplot_xray(kwic(corp, pattern = "american"), scale = "absolute") +#' textplot_xray(kwic(toks, pattern = "american")) +#' textplot_xray(kwic(toks, pattern = "american"), scale = "absolute") #' #' # compare multiple terms across multiple documents -#' textplot_xray(kwic(corp, pattern = "america*"), -#' kwic(corp, pattern = "people")) +#' textplot_xray(kwic(toks, pattern = "america*"), +#' kwic(toks, pattern = "people")) #' #' \dontrun{ #' # how to modify the ggplot with different options #' library("ggplot2") -#' tplot <- textplot_xray(kwic(corp, pattern = "american"), -#' kwic(corp, pattern = "people")) +#' tplot <- textplot_xray(kwic(toks, pattern = "american"), +#' kwic(toks, pattern = "people")) #' tplot + aes(color = keyword) + scale_color_manual(values = c('red', 'blue')) #' #' # adjust the names of the document names -#' docnames(corp) <- apply(docvars(corp, c("Year", "President")), 1, paste, collapse = ", ") -#' textplot_xray(kwic(corp, pattern = "america*"), -#' kwic(corp, pattern = "people")) +#' docnames(toks) <- apply(docvars(toks, c("Year", "President")), 1, paste, collapse = ", ") +#' textplot_xray(kwic(toks, pattern = "america*"), +#' kwic(toks, pattern = "people")) #' } #' @export #' @keywords textplot diff --git a/cran-comments.md b/cran-comments.md index e431ab1..1c29c3a 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,12 +1,12 @@ # Submission notes -Resubmitting to reset the flag for the noSuggests, because a package that **quanteda.textplots** suggests was off CRAN for a week. +Fixes NOTEs warned about by CRAN on 22-Jan-2024. # Checks ## Test environments -* local macOS 13.2.1, R 4.2.3 +* local macOS 14.2.1, R 4.2.3 * Ubuntu 22.04 LTS, R 4.2.3 * Windows release via devtools::check_win_release() * Windows devel via devtools::check_win_devel() diff --git a/man/textplot_keyness.Rd b/man/textplot_keyness.Rd index 8245abd..831ae84 100644 --- a/man/textplot_keyness.Rd +++ b/man/textplot_keyness.Rd @@ -55,10 +55,10 @@ using \code{\link[quanteda.textstats:textstat_keyness]{quanteda.textstats::texts \dontrun{ library("quanteda") # compare Trump speeches to other Presidents by chi^2 -dfmat1 <- data_corpus_inaugural \%>\% - corpus_subset(Year > 1980) \%>\% - tokens(remove_punct = TRUE) \%>\% - tokens_remove(stopwords("en")) \%>\% +dfmat1 <- data_corpus_inaugural |> + corpus_subset(Year > 1980) |> + tokens(remove_punct = TRUE) |> + tokens_remove(stopwords("en")) |> dfm() dfmat1 <- dfm_group(dfmat1, groups = dfmat1$President) tstat1 <- quanteda.textstats::textstat_keyness(dfmat1, target = "Trump") @@ -67,13 +67,13 @@ tstat1 <- quanteda.textstats::textstat_keyness(dfmat1, target = "Trump") textplot_keyness(tstat1, margin = 0.2, n = 10) # compare contemporary Democrats v. Republicans -corp <- data_corpus_inaugural \%>\% +corp <- data_corpus_inaugural |> corpus_subset(Year > 1960) corp$party <- ifelse(docvars(corp, "President") \%in\% c("Nixon", "Reagan", "Bush", "Trump"), "Republican", "Democrat") -dfmat2 <- corp \%>\% - tokens(remove_punct = TRUE) \%>\% - tokens_remove(stopwords("en")) \%>\% +dfmat2 <- corp |> + tokens(remove_punct = TRUE) |> + tokens_remove(stopwords("en")) |> dfm() tstat2 <- quanteda.textstats::textstat_keyness(dfm_group(dfmat2, groups = dfmat2$party), target = "Democrat", measure = "lr") diff --git a/man/textplot_network.Rd b/man/textplot_network.Rd index 4bff6c8..033bb40 100644 --- a/man/textplot_network.Rd +++ b/man/textplot_network.Rd @@ -77,19 +77,22 @@ own plotting function using \code{\link[=as.network.fcm]{as.network()}}. \examples{ set.seed(100) library("quanteda") -toks <- data_char_ukimmig2010 \%>\% - tokens(remove_punct = TRUE) \%>\% - tokens_tolower() \%>\% +toks <- data_char_ukimmig2010 |> + tokens(remove_punct = TRUE) |> + tokens_tolower() |> tokens_remove(pattern = stopwords("english"), padding = FALSE) fcmat <- fcm(toks, context = "window", tri = FALSE) -feat <- names(topfeatures(fcmat, 30)) -fcm_select(fcmat, pattern = feat) \%>\% +feat <- colSums(fcmat) |> + sort(decreasing = TRUE) |> + head(30) |> + names() +fcm_select(fcmat, pattern = feat) |> textplot_network(min_freq = 0.5) -fcm_select(fcmat, pattern = feat) \%>\% +fcm_select(fcmat, pattern = feat) |> textplot_network(min_freq = 0.8) -fcm_select(fcmat, pattern = feat) \%>\% +fcm_select(fcmat, pattern = feat) |> textplot_network(min_freq = 0.8, vertex_labelcolor = rep(c('gray40', NA), 15)) -fcm_select(fcmat, pattern = feat) \%>\% +fcm_select(fcmat, pattern = feat) |> textplot_network(vertex_labelsize = 10) fcm_30 <- fcm_select(fcmat, pattern = feat) textplot_network(fcm_30, @@ -102,7 +105,7 @@ textplot_network(fcm_30, # as.igraph if (requireNamespace("igraph", quietly = TRUE)) { txt <- c("a a a b b c", "a a c e", "a c e f g") - mat <- fcm(txt) + mat <- fcm(tokens(txt)) as.igraph(mat, min_freq = 1, omit_isolated = FALSE) } } diff --git a/man/textplot_wordcloud.Rd b/man/textplot_wordcloud.Rd index dc34516..76ee2b0 100644 --- a/man/textplot_wordcloud.Rd +++ b/man/textplot_wordcloud.Rd @@ -98,11 +98,11 @@ documents by some document variable. # plot the features (without stopwords) from Obama's inaugural addresses set.seed(10) library("quanteda") -dfmat1 <- data_corpus_inaugural \%>\% - corpus_subset(President == "Obama") \%>\% - tokens(remove_punct = TRUE) \%>\% - tokens_remove(stopwords("en")) \%>\% - dfm() \%>\% +dfmat1 <- data_corpus_inaugural |> + corpus_subset(President == "Obama") |> + tokens(remove_punct = TRUE) |> + tokens_remove(stopwords("en")) |> + dfm() |> dfm_trim(min_termfreq = 3) # basic wordcloud @@ -118,12 +118,12 @@ textplot_wordcloud(dfmat1, adjust = 0.5, random_order = FALSE, color = col, rotation = FALSE) # comparison plot of Obama v. Trump -dfmat2 <- data_corpus_inaugural \%>\% - corpus_subset(President \%in\% c("Obama", "Trump")) \%>\% - tokens(remove_punct = TRUE) \%>\% - tokens_remove(stopwords("en")) \%>\% +dfmat2 <- data_corpus_inaugural |> + corpus_subset(President \%in\% c("Obama", "Trump")) |> + tokens(remove_punct = TRUE) |> + tokens_remove(stopwords("en")) |> dfm() -dfmat2 <- dfm_group(dfmat2, dfmat2$President) \%>\% +dfmat2 <- dfm_group(dfmat2, dfmat2$President) |> dfm_trim(min_termfreq = 3) textplot_wordcloud(dfmat2, comparison = TRUE, max_words = 100, @@ -131,10 +131,10 @@ textplot_wordcloud(dfmat2, comparison = TRUE, max_words = 100, \dontrun{ # for keyness -tstat <- data_corpus_inaugural[c(1, 3)] \%>\% - tokens(remove_punct = TRUE) \%>\% - tokens_remove(stopwords("en")) \%>\% - dfm() \%>\% +tstat <- data_corpus_inaugural[c(1, 3)] |> + tokens(remove_punct = TRUE) |> + tokens_remove(stopwords("en")) |> + dfm() |> quanteda.textstats::textstat_keyness() textplot_wordcloud(tstat, min_count = 2) textplot_wordcloud(tstat, min_count = 2, comparison = FALSE) diff --git a/man/textplot_xray.Rd b/man/textplot_xray.Rd index 3d3343f..1b1531e 100644 --- a/man/textplot_xray.Rd +++ b/man/textplot_xray.Rd @@ -43,26 +43,28 @@ is to produce one kwic per dictionary key, and send them as a list to \examples{ library("quanteda") -corp <- corpus_subset(data_corpus_inaugural, Year > 1970) +toks <- data_corpus_inaugural |> + corpus_subset(Year > 1970) |> + tokens() # compare multiple documents -textplot_xray(kwic(corp, pattern = "american")) -textplot_xray(kwic(corp, pattern = "american"), scale = "absolute") +textplot_xray(kwic(toks, pattern = "american")) +textplot_xray(kwic(toks, pattern = "american"), scale = "absolute") # compare multiple terms across multiple documents -textplot_xray(kwic(corp, pattern = "america*"), - kwic(corp, pattern = "people")) +textplot_xray(kwic(toks, pattern = "america*"), + kwic(toks, pattern = "people")) \dontrun{ # how to modify the ggplot with different options library("ggplot2") -tplot <- textplot_xray(kwic(corp, pattern = "american"), - kwic(corp, pattern = "people")) +tplot <- textplot_xray(kwic(toks, pattern = "american"), + kwic(toks, pattern = "people")) tplot + aes(color = keyword) + scale_color_manual(values = c('red', 'blue')) # adjust the names of the document names -docnames(corp) <- apply(docvars(corp, c("Year", "President")), 1, paste, collapse = ", ") -textplot_xray(kwic(corp, pattern = "america*"), - kwic(corp, pattern = "people")) +docnames(toks) <- apply(docvars(toks, c("Year", "President")), 1, paste, collapse = ", ") +textplot_xray(kwic(toks, pattern = "america*"), + kwic(toks, pattern = "people")) } } \keyword{textplot} diff --git a/man/wordcloud_comparison.Rd b/man/wordcloud_comparison.Rd index cb98984..c20b155 100644 --- a/man/wordcloud_comparison.Rd +++ b/man/wordcloud_comparison.Rd @@ -89,10 +89,6 @@ aspect ratio only supported if rotation = 0.} \item{...}{additional parameters. Only used to make it compatible with \pkg{wordcloud}} - -\item{random.color}{deprecated argument} - -\item{ordered.colors}{deprecated argument} } \description{ This function implements wordcloud that compares documents. Code is adopted diff --git a/tests/testthat/test-textplot_keyness.R b/tests/testthat/test-textplot_keyness.R index 40947e4..90f2ff6 100644 --- a/tests/testthat/test-textplot_keyness.R +++ b/tests/testthat/test-textplot_keyness.R @@ -4,9 +4,9 @@ pdf(file = tempfile(".pdf"), width = 10, height = 10) test_that("test textplot_keyness: show_reference works correctly ", { skip("until quanteda.textstats is updated") - presdfm <- corpus_subset(data_corpus_inaugural, President %in% c("Obama", "Trump")) %>% - tokens(remove_punct = TRUE) %>% - tokens_remove(stopwords("en")) %>% + presdfm <- corpus_subset(data_corpus_inaugural, President %in% c("Obama", "Trump")) |> + tokens(remove_punct = TRUE) |> + tokens_remove(stopwords("en")) |> dfm() presdfm <- dfm_group(presdfm, groups = presdfm$President) result <- quanteda.textstats::textstat_keyness(presdfm, target = "Trump") diff --git a/tests/testthat/test-textplot_wordcloud.R b/tests/testthat/test-textplot_wordcloud.R index 74a4d88..66674f9 100644 --- a/tests/testthat/test-textplot_wordcloud.R +++ b/tests/testthat/test-textplot_wordcloud.R @@ -8,10 +8,10 @@ test_that("test textplot_wordcloud works for dfm objects", { test_that("test textplot_wordcloud works for keyness objects", { skip("until quanteda.textstats is updated") - tstat <- head(data_corpus_inaugural, 2) %>% - tokens() %>% - tokens_remove(stopwords("en")) %>% - dfm() %>% + tstat <- head(data_corpus_inaugural, 2) |> + tokens() |> + tokens_remove(stopwords("en")) |> + dfm() |> quanteda.textstats::textstat_keyness(target = 1) expect_silent(textplot_wordcloud(tstat, max_words = 50)) expect_silent(textplot_wordcloud(tstat, comparison = FALSE, max_words = 50)) @@ -25,7 +25,7 @@ test_that("test textplot_wordcloud comparison works", { set.seed(1) docvars(testcorp, "label") <- sample(c("A", "B"), size = ndoc(testcorp), replace = TRUE) docnames(testcorp) <- paste0("text", 1:ndoc(testcorp)) - testdfm <- dfm(tokens(testcorp)) %>% + testdfm <- dfm(tokens(testcorp)) |> dfm_remove(stopwords("en")) testdfm_grouped <- dfm_group(testdfm, groups = testdfm$label) @@ -45,8 +45,8 @@ test_that("test textplot_wordcloud comparison works", { ) dfmsmall <- dfm(tokens(data_corpus_inaugural[1:9], remove_punct = TRUE)) - dfmsmall <- dfm_group(dfmsmall, groups = dfmsmall$President) %>% - dfm_remove(stopwords("en")) %>% + dfmsmall <- dfm_group(dfmsmall, groups = dfmsmall$President) |> + dfm_remove(stopwords("en")) |> dfm_trim(min_termfreq = 20) expect_silent(textplot_wordcloud(dfmsmall, comparison = TRUE)) expect_silent(textplot_wordcloud(dfmsmall, color = 1:5))