From 583c00902c89d5c6fa142bdf6f13b7db7b4aa6f0 Mon Sep 17 00:00:00 2001 From: JBGruber Date: Wed, 18 Dec 2019 20:43:57 +0100 Subject: [PATCH] Added default encoding and changed to readLines instead stringi --- R/rwhatsapp.R | 13 ++++++++----- README.md | 2 +- inst/WORDLIST | 1 + man/rwa_read.Rd | 7 +++++-- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/rwhatsapp.R b/R/rwhatsapp.R index ab18207..572cc2e 100755 --- a/R/rwhatsapp.R +++ b/R/rwhatsapp.R @@ -16,7 +16,9 @@ #' \link[stringi]{stri_datetime_parse} for guidance. #' @param verbose A logical flag indicating whether information should be #' printed to the screen. -#' @param ... Further arguments passed to \link[stringi]{stri_read_lines}. +#' @param encoding Input encoding. Should usually be "UTF-8" if files haven't +#' changed since export from WhatsApp. +#' @param ... Further arguments passed to \link[base]{readLines}. #' #' @return A tibble with the information parsed from the history file. #' @export @@ -31,6 +33,7 @@ rwa_read <- function(x, tz = NULL, format = NULL, verbose = FALSE, + encoding = "UTF-8", ...) { if (verbose) { @@ -40,7 +43,7 @@ rwa_read <- function(x, start_time <- NULL } - chat_raw <- rwa_read_lines(x, verbose, start_time, ...) + chat_raw <- rwa_read_lines(x, verbose, start_time, encoding, ...) chat_raw <- chat_raw[!chat_raw == ""] time <- stri_extract_first_regex( @@ -126,7 +129,7 @@ rwa_read <- function(x, #' @inherit rwa_read #' @import stringi #' @noRd -rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { +rwa_read_lines <- function(x, verbose, start_time = NULL, encoding, ...) { # get files zps <- grep(".zip$", x, ignore.case = TRUE) temp <- NULL @@ -144,7 +147,7 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { if (f_exist_s(x)) { if (length(x) == 1) { - chat_raw <- stri_read_lines(x, ...) + chat_raw <- readLines(x, encoding = encoding, ...) names(chat_raw) <- rep(x, length(chat_raw)) if (verbose) { message(" one log file...") @@ -152,7 +155,7 @@ rwa_read_lines <- function(x, verbose, start_time = NULL, ...) { } } else { chat_raw <- unlist(lapply(x, function(t) { - cr <- stri_read_lines(t)#, ...) + cr <- readLines(t, encoding = encoding, ...) names(cr) <- rep(t, length(cr)) return(cr) })) diff --git a/README.md b/README.md index 2373de6..73da2c1 100755 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.re ![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/rwhatsapp) [![Travis-CI Build Status](https://travis-ci.org/JBGruber/rwhatsapp.svg?branch=master)](https://travis-ci.org/JBGruber/rwhatsapp) -[![Say + ## Motivation diff --git a/inst/WORDLIST b/inst/WORDLIST index d86c505..c9be653 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -19,6 +19,7 @@ Macbook ne oman prozessor +readLines stopwords stri tf diff --git a/man/rwa_read.Rd b/man/rwa_read.Rd index b0f5779..1b4a0d8 100644 --- a/man/rwa_read.Rd +++ b/man/rwa_read.Rd @@ -4,7 +4,7 @@ \alias{rwa_read} \title{Read WhatsApp history into R} \usage{ -rwa_read(x, tz = NULL, format = NULL, verbose = FALSE, ...) +rwa_read(x, tz = NULL, format = NULL, verbose = FALSE, encoding = "UTF-8", ...) } \arguments{ \item{x}{Path to a txt or zip file of a WhatsApp history or the history @@ -21,7 +21,10 @@ problems you can provide a custom format here. Refer to \item{verbose}{A logical flag indicating whether information should be printed to the screen.} -\item{...}{Further arguments passed to \link[stringi]{stri_read_lines}.} +\item{encoding}{Input encoding. Should usually be "UTF-8" if files haven't +changed since export from WhatsApp.} + +\item{...}{Further arguments passed to \link[base]{readLines}.} } \value{ A tibble with the information parsed from the history file.