diff --git a/DESCRIPTION b/DESCRIPTION index 6d618e3..c822767 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,12 +17,12 @@ Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 Imports: - methods, - utils + lubridate Suggests: knitr, rmarkdown, ncdf4, + ncdfCF, RNetCDF, testthat (>= 3.0.0), stringr @@ -30,3 +30,17 @@ URL: https://github.com/pvanlaake/CFtime BugReports: https://github.com/pvanlaake/CFtime/issues VignetteBuilder: knitr Config/testthat/edition: 3 +Collate: + 'api.R' + 'CFCalendar.R' + 'CFCalendar360.R' + 'CFCalendar365.R' + 'CFCalendar366.R' + 'CFCalendarJulian.R' + 'CFCalendarProleptic.R' + 'CFCalendarStandard.R' + 'CFtime-package.R' + 'CFtime.R' + 'deprecated.R' + 'helpers.R' + 'zzz.R' diff --git a/NAMESPACE b/NAMESPACE index fc5cf62..67da2a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,35 +1,38 @@ # Generated by roxygen2: do not edit by hand -S3method(str,CFdatum) +S3method("+",CFTime) +S3method("==",CFTime) +S3method(as.character,CFTime) +S3method(cut,CFTime) +S3method(length,CFTime) +S3method(range,CFTime) +export("bounds<-") +export(CFTime) export(CFcomplete) -export(CFdatum) export(CFfactor) export(CFfactor_coverage) export(CFfactor_units) export(CFmonth_days) export(CFparse) +export(CFsubset) export(CFtime) export(CFtimestamp) export(as_timestamp) +export(bounds) export(calendar) export(definition) +export(indexOf) export(is_complete) export(month_days) export(offsets) export(origin) +export(parse_timestamps) export(resolution) export(slab) export(timezone) export(unit) -exportClasses(CFdatum) -exportClasses(CFtime) -exportMethods("+") -exportMethods("==") -exportMethods("bounds<-") -exportMethods(as.character) -exportMethods(bounds) -exportMethods(cut) -exportMethods(format) -exportMethods(indexOf) -exportMethods(length) -exportMethods(range) +importFrom(lubridate,days) +importFrom(lubridate,make_date) +importFrom(lubridate,mday) +importFrom(lubridate,month) +importFrom(lubridate,year) diff --git a/NEWS.md b/NEWS.md index e18a6a4..6d09653 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,14 @@ * Do not drop degenerate dimension on bounds when only 1 offset is included in subsetting. +* `standard` calendar now uses mixed Gregorian/Julian calendar as defined in the +CF Metadata Conventions. `proleptic_gregorian` is now a separate calendar with +its own code base. +* Negative offsets from a calendar origin are allowed. +* Code is refactored to R6. R6 class CFTime replaces S4 class CFtime (note the +difference in case). S4 class CFdatum has been replaced by hierarchy of +R6 CFCalendar classes, with various non-exported functions converted into +methods of CFCalendar. The code is now much cleaner and easier to extend. # CFtime 1.4.1 diff --git a/R/CFCalendar.R b/R/CFCalendar.R new file mode 100644 index 0000000..8ed2e09 --- /dev/null +++ b/R/CFCalendar.R @@ -0,0 +1,350 @@ +#' @title Basic CF calendar +#' +#' @description This class represents a basic CF calendar. It should not be +#' instantiated directly; instead, use one of the descendant classes. +#' +#' This internal class stores the information to represent date and time +#' values using the CF conventions. An instance is created by the exported +#' [CFTime] class, which also exposes the relevant properties of this class. +#' +#' The following calendars are supported: +#' +#' \itemize{ +#' \item [`gregorian\standard`][CFCalendarStandard], the international standard calendar for civil use. +#' \item [`proleptic_gregorian`][CFCalendarProleptic], the standard calendar but extending before 1582-10-15 +#' when the Gregorian calendar was adopted. +#' \item [`julian`][CFCalendarJulian], every fourth year is a leap year (so including the years 1700, 1800, 1900, 2100, etc). +#' \item [`noleap\365_day`][CFCalendar365], all years have 365 days. +#' \item [`all_leap\366_day`][CFCalendar366], all years have 366 days. +#' \item [`360_day`][CFCalendar360], all years have 360 days, divided over 12 months of 30 days. +#' } +#' @references +#' https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#calendar +#' @docType class +CFCalendar <- R6::R6Class("CFCalendar", + public = list( + #' @field name Descriptive name of the calendar, as per the CF Metadata + #' Conventions. + name = "", + + #' @field definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + definition = "", + + #' @field unit The numeric id of the unit of the calendar. + unit = -1L, + + #' @field origin `data.frame` with fields for the origin of the calendar. + origin = data.frame(), + + #' @description Create a new CF calendar. + #' @param nm The name of the calendar. This must follow the CF Metadata + #' Conventions. + #' @param definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + initialize = function(nm, definition) { + stopifnot(length(definition) == 1L, length(nm) == 1L) + self$name <- tolower(nm) + self$definition <- definition + + parts <- strsplit(definition, " ")[[1L]] + if ((length(parts) < 3L) || !(tolower(parts[2L]) %in% c("since", "after", "from", "ref", "per"))) + stop("Definition string does not appear to be a CF-compliant time coordinate description", call. = FALSE) + u <- which(CFt$CFunits$unit == tolower(parts[1L])) + if (length(u) == 0L) stop("Unsupported unit: ", parts[1L], call. = FALSE) + self$unit <- CFt$CFunits$id[u] + + dt <- self$parse(paste(parts[3L:length(parts)], collapse = " ")) + if (is.na(dt$year[1L])) + stop("Definition string does not appear to be a CF-compliant time coordinate description: invalid base date specification", call. = FALSE) + self$origin <- dt + }, + + #' @description Print information about the calendar to the console. + #' @param ... Ignored. + #' @return `self`, invisibly. + print = function(...) { + tz <- self$timezone + if (tz == "+0000") tz <- "" + cat("CF calendar:", + "\n Origin : ", self$origin_date, " ", self$origin_time, tz, + "\n Units : ", CFt$units$name[self$unit], + "\n Type : ", self$name, "\n", + sep = "") + invisible(self) + }, + + #' @description Indicate which of the supplied dates are valid. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return `NULL`. A warning will be generated to the effect that a + #' descendant class should be used for this method. + valid_days = function(ymd) { + warning("Use a descendant class from `CFCalendar` to call this method.", call. = FALSE) + NULL + }, + + #' @description Indicate if the time series described using this calendar + #' can be safely converted to a standard date-time type (`POSIXct`, + #' `POSIXlt`, `Date`). + #' + #' Only the 'standard' calendar and the 'proleptic_gregorian' calendar + #' when all dates in the time series are more recent than 1582-10-15 + #' (inclusive) can be safely converted, so this method returns `FALSE` by + #' default to cover the majority of cases. + #' @param offsets The offsets from the CFtime instance. + #' @return `FALSE` by default. + POSIX_compatible = function(offsets) { + FALSE + }, + + #' @description This method tests if the `CFCalendar` instance in argument + #' `cal` is compatible with `self`, meaning that they are of the same + #' class and have the same unit. Calendars "standard", and "gregorian" are + #' compatible, as are the pairs of "365_day" and "no_leap", and "366_day" + #' and "all_leap". + #' @param cal Instance of a descendant of the `CFCalendar` class. + #' @return `TRUE` if the instance in argument `cal` is compatible with + #' `self`, `FALSE` otherwise. + is_compatible = function(cal) { + self$unit == cal$unit && class(self)[1L] == class(cal)[1L] + }, + + #' @description This method tests if the `CFCalendar` instance in argument + #' `cal` is equivalent to `self`, meaning that they are of the same class, + #' have the same unit, and equivalent origins. Calendars "standard", and + #' "gregorian" are equivalent, as are the pairs of "365_day" and + #' "no_leap", and "366_day" and "all_leap". + #' + #' Note that the origins need not be identical, but their parsed values + #' have to be. "2000-01" is parsed the same as "2000-01-01 00:00:00", for + #' instance. + #' @param cal Instance of a descendant of the `CFCalendar` class. + #' @return `TRUE` if the instance in argument `cal` is equivalent to + #' `self`, `FALSE` otherwise. + is_equivalent = function(cal) { + sum(self$origin[1L,1L:6L] == cal$origin[1L,1L:6L]) == 6L && # Offset column is NA + self$is_compatible(cal) + }, + + #' @description Parsing a vector of date-time character strings into parts. + #' @param d character. A character vector of date-times. + #' @return A `data.frame` with columns year, month, day, hour, minute, + #' second, time zone, and offset. Invalid input data will appear as `NA`. + parse = function(d) { + # Parsers + + # UDUNITS broken timestamp definition, with some changes + # broken_timestamp {broken_date}({space|T}+{broken_clock})? -- T not in definition but present in lexer code + # broken_date {year}-{month}(-{day})? + # year [+-]?[0-9]{1,4} + # month 0?[1-9]|1[0-2] + # day 0?[1-9]|[1-2][0-9]|30|31 + # broken_clock {hour}:{minute}(:{second})? + # hour [0-1]?[0-9]|2[0-3] -- sign on hour not allowed, but see timezone + # minute [0-5]?[0-9] + # second {minute}? -- leap second not supported + # fractional part (\.[0-9]*)? + # timezone [+-]?{hour}(:{minute})? -- added, present in lexer code + broken <- paste0( + "^", # anchor string at start + "([+-]?[0-9]{1,4})", # year, with optional sign + "-(0?[1-9]|1[012])", # month + "(?:-(0?[1-9]|[12][0-9]|3[01]))?", # day, optional + "(?:[T ]", # if a time is following, separate with a single whitespace character or a "T" + "([01]?[0-9]|2[0-3])", # hour + ":([0-5]?[0-9])", # minute + "(?::([0-5]?[0-9]))?", # second, optional + "(?:\\.([0-9]*))?", # optional fractional part of the smallest specified unit + ")?", # close optional time capture group + "(?:\\s", # if a time zone offset is following, separate with a single whitespace character + "([+-])?([01]?[0-9]|2[0-3])", # tz hour, with optional sign + "(?::(00|15|30|45))?", # optional tz minute, only 4 possible values + ")?", # close optional timezone capture group + "$" # anchor string at end + ) + + iso8601 <- paste0( + "^", + "([0-9]{4})", + "-(0[1-9]|1[012])", + "-(0[1-9]|[12][0-9]|3[01])?", + "(?:", + "[T ]([01][0-9]|2[0-3])", + "(?::([0-5][0-9]))?", + "(?::([0-5][0-9]))?", + "(?:\\.([0-9]*))?", + ")?", + "(?:([Z+-])([01][0-9]|2[0-3])?(?::(00|15|30|45))?", ## FIXME: Z?, smaller number of captures + ")?$" + ) + + # UDUNITS packed timestamp definition - NOT YET USED + # packed_timestamp {packed_date}({space|T}+{packed_clock})? -- T and space only allowed in packed time follows + # packed_date {year}({month}{day}?)? -- must be YYYYMMDD or else format is ambiguous, as per lexer code + # packed_clock {hour}({minute}{second}?)? -- must be HHMMSS to be unambiguous + # timezone [+-]?{hour}({minute})? -- added, present in lexer code, must be HHMM + # packed <- stringi::stri_join( + # "^", # anchor string at start + # "([+-]?[0-9]{4})", # year, with optional sign + # "(0[1-9]|1[012])?", # month, optional + # "(0[1-9]|[12][0-9]|3[01])?", # day, optional + # "(?:[T,\\s]", # if a time is following, separate with a single whitespace character or a "T" + # "([01][0-9]|2[0-3])?", # hour + # "([0-5][0-9])?", # minute, optional + # "([0-5]?[0-9](?:\\.[0-9]*)?)?", # second, optional, with optional fractional part + # ")?", # close optional time capture group + # "(?:\\s", # if a time zone offset is following, separate with a single whitespace character + # "([+-]?[01][0-9]|2[0-3])?", # hour, with optional sign + # "(00|15|30|45)?", # minute, only 4 possible values + # ")?", # close optional timezone capture group + # "$" # anchor string at end + # ) + + parse <- data.frame(year = integer(), month = integer(), day = integer(), + hour = integer(), minute = integer(), second = numeric(), frac = character(), + tz_sign = character(), tz_hour = character(), tz_min = character()) + + # Drop "UTC", if given + d <- trimws(gsub("UTC$", "", d)) + + cap <- utils::strcapture(iso8601, d, parse) + missing <- which(is.na(cap$year)) + if (length(missing) > 0L) + cap[missing,] <- utils::strcapture(broken, d[missing], parse) + + # Assign any fraction to the appropriate time part + cap$frac[is.na(cap$frac)] <- "0" + frac <- as.numeric(paste0("0.", cap$frac)) + if (sum(frac) > 0) { + ndx <- which(!(is.na(cap$second)) & frac > 0) + if (length(ndx) > 0L) cap$second[ndx] <- cap$second[ndx] + frac[ndx] + ndx <- which(!(is.na(cap$minute)) & is.na(cap$second) & frac > 0) + if (length(ndx) > 0L) cap$second[ndx] <- 60L * frac[ndx] + ndx <- which(!(is.na(cap$hour)) & is.na(cap$minute) & frac > 0) + if (length(ndx) > 0L) { + secs <- 3600 * frac + cap$minute[ndx] <- secs[ndx] %/% 60 + cap$second[ndx] <- secs[ndx] %% 60 + } + } + cap$frac <- NULL + + # Convert NA time parts to 0 - in CF default time is 00:00:00 when not specified + cap$hour[is.na(cap$hour)] <- 0L + cap$minute[is.na(cap$minute)] <- 0L + cap$second[is.na(cap$second)] <- 0L + + # Set timezone to default value where needed + ndx <- which(cap$tz_sign == "Z") + if (length(ndx) > 0L) { + cap$tz_sign[ndx] <- "+" + cap$tz_hour[ndx] <- "00" + cap$tz_min[ndx] <- "00" + } + cap$tz <- paste0(ifelse(cap$tz_sign == "", "+", cap$tz_sign), + ifelse(cap$tz_hour == "", "00", cap$tz_hour), + ifelse(cap$tz_min == "", "00", cap$tz_min)) + cap$tz_sign <- cap$tz_hour <- cap$tz_min <- NULL + + # Set optional date parts to 1 if not specified + cap$month[is.na(cap$month)] <- 1L + cap$day[is.na(cap$day)] <- 1L + + # Check date validity + invalid <- !self$valid_days(cap) + if (sum(invalid, na.rm = TRUE) > 0L) cap[invalid,] <- rep(NA, 7) + + # Calculate offsets + if (nrow(self$origin) == 0L) { # if there's no origin yet, don't calculate offsets + cap$offset <- rep(0, nrow(cap)) # this happens, f.i., when a CFCalendar is created + } else { + days <- self$date2offset(cap) + cap$offset <- round((days * 86400 + (cap$hour - self$origin$hour[1]) * 3600 + + (cap$minute - self$origin$minute[1]) * 60 + + cap$second - self$origin$second) / CFt$units$seconds[self$unit], 3) + } + cap + }, + + #' @description Decompose a vector of offsets, in units of the calendar, to + #' their timestamp values. This adds a specified amount of time to the + #' origin of a `CFTime` object. + #' + #' This method may introduce inaccuracies where the calendar unit is + #' "months" or "years", due to the ambiguous definition of these units. + #' @param offsets Vector of numeric offsets to add to the origin of the + #' calendar. + #' @return A `data.frame` with columns for the timestamp elements and as + #' many rows as there are offsets. + offsets2time = function(offsets) { + len <- length(offsets) + if(len == 0L) return(data.frame(year = integer(), month = integer(), day = integer(), + hour = integer(), minute = integer(), second = numeric(), + tz = character(), offset = numeric())) + + if (self$unit <= 4L) { # Days, hours, minutes, seconds + # First add time: convert to seconds first, then recompute time parts + secs <- offsets * CFt$units$seconds[self$unit] + + self$origin$hour * 3600L + self$origin$minute * 60L + self$origin$second + days <- secs %/% 86400L # overflow days + secs <- round(secs %% 86400L, 3L) # drop overflow days from time, round down to milli-seconds to avoid errors + + # Time elements for output + hrs <- secs %/% 3600L + mins <- (secs %% 3600L) %/% 60L + secs <- secs %% 60L + + # Now add days using the calendar + out <- if (any(days != 0L)) + self$offset2date(days) + else + data.frame(year = rep(self$origin$year, len), + month = rep(self$origin$month, len), + day = rep(self$origin$day, len)) + + # Put it all back together again + out$hour <- hrs + out$minute <- mins + out$second <- secs + out$tz <- rep(self$timezone, len) + } else { # Months, years + out <- self$origin[rep(1L, len), ] + if (self$unit == 5L) { # Offsets are months + months <- out$month + offsets - 1L + out$month <- months %% 12L + 1L + out$year <- out$year + months %/% 12L + } else { # Offsets are years + out$year <- out$year + offsets + } + } + out$offset <- offsets + out + } + + ), + active = list( + #' @field origin_date (read-only) Character string with the date of the + #' calendar. + origin_date = function(value) { + if (missing(value)) { + sprintf("%04d-%02d-%02d", self$origin$year, self$origin$month, self$origin$day) + } + }, + + #' @field origin_time (read-only) Character string with the time of the + #' calendar. + origin_time = function(value) { + if (missing(value)) { + .format_time(self$origin) + } + }, + + #' @field timezone (read-only) Character string with the time zone of the + #' origin of the calendar. + timezone = function(value) { + if (missing(value)) + self$origin$tz + } + ) +) diff --git a/R/CFCalendar360.R b/R/CFCalendar360.R new file mode 100644 index 0000000..c73bd3a --- /dev/null +++ b/R/CFCalendar360.R @@ -0,0 +1,93 @@ +#' @title 360-day CF calendar +#' +#' @description This class represents a CF calendar of 360 days per year, evenly +#' divided over 12 months of 30 days. This calendar is obviously not +#' compatible with the standard POSIXt calendar. +#' +#' This calendar supports dates before year 1 and includes the year 0. +#' +#' @aliases CFCalendar360 +#' @docType class +CFCalendar360 <- R6::R6Class("CFCalendar360", + inherit = CFCalendar, + public = list( + #' @description Create a new CF calendar. + #' @param nm The name of the calendar. This must be "360_day". This argument + #' is superfluous but maintained to be consistent with the initialization + #' methods of the parent and sibling classes. + #' @param definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + #' @return A new instance of this class. + initialize = function(nm, definition) { + super$initialize(nm, definition) + }, + + #' @description Indicate which of the supplied dates are valid. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return Logical vector with the same length as argument `ymd` has rows + #' with `TRUE` for valid days and `FALSE` for invalid days, or `NA` where + #' the row in argument `ymd` has `NA` values. + valid_days = function(ymd) { + ymd$year & ymd$month >= 1L & ymd$month <= 12L & ymd$day >= 1L & ymd$day <= 30L + }, + + #' @description Determine the number of days in the month of the calendar. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return A vector indicating the number of days in each month for the + #' dates supplied as argument `ymd`. If no dates are supplied, the number + #' of days per month for the calendar as a vector of length 12. + month_days = function(ymd = NULL) { + if (is.null(ymd)) return(rep(30L, 12L)) + + res <- rep(30L, nrow(ymd)) + res[which(is.na(ymd$year))] <- NA + res + }, + + #' @description Indicate which years are leap years. + #' @param yr Integer vector of years to test. + #' @return Logical vector with the same length as argument `yr`. Since this + #' calendar does not use leap days, all values will be `FALSE`, or `NA` + #' where argument `yr` is `NA`. + leap_year = function(yr) { + res <- rep(FALSE, length(yr)) + res[which(is.na(yr))] <- NA + res + }, + + #' @description Calculate difference in days between a `data.frame` of time + #' parts and the origin. + #' + #' @param x `data.frame`. Dates to calculate the difference for. + #' + #' @return Integer vector of a length equal to the number of rows in + #' argument `x` indicating the number of days between `x` and the `origin`, + #' or `NA` for rows in `x` with `NA` values. + date2offset = function(x) { + (x$year - self$origin$year) * 360L + (x$month - self$origin$month) * 30L + x$day - self$origin$day + }, + + #' @description Calculate date parts from day differences from the origin. + #' This only deals with days as these are impacted by the calendar. + #' Hour-minute-second timestamp parts are handled in [CFCalendar]. + #' + #' @param x Integer vector of days to add to the origin. + #' + #' @return A `data.frame` with columns 'year', 'month' and 'day' and as many + #' rows as the length of vector `x`. + offset2date = function(x) { + y <- self$origin$year + x %/% 360L + m <- self$origin$month + (x %% 360L) %/% 30L + d <- self$origin$day + x %% 30L + over <- which(d > 30L) + d[over] <- d[over] - 30L + m[over] <- m[over] + 1L + over <- which(m > 12L) + m[over] <- m[over] - 12L + y[over] <- y[over] + 1L + data.frame(year = y, month = m, day = d, row.names = NULL) + } + ) +) diff --git a/R/CFCalendar365.R b/R/CFCalendar365.R new file mode 100644 index 0000000..8101ede --- /dev/null +++ b/R/CFCalendar365.R @@ -0,0 +1,101 @@ +#' @title 365-day CF calendar +#' +#' @description This class represents a CF calendar of 365 days per year, having +#' no leap days in any year. This calendar is not compatible with the standard +#' POSIXt calendar. +#' +#' This calendar supports dates before year 1 and includes the year 0. +#' +#' @aliases CFCalendar365 +#' @docType class +CFCalendar365 <- R6::R6Class("CFCalendar365", + inherit = CFCalendar, + public = list( + #' @description Create a new CF calendar of 365 days per year. + #' @param nm The name of the calendar. This must be "365_day" or "noleap". + #' @param definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + #' @return A new instance of this class. + initialize = function(nm, definition) { + super$initialize(nm, definition) + }, + + #' @description Indicate which of the supplied dates are valid. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return Logical vector with the same length as argument `ymd` has rows + #' with `TRUE` for valid days and `FALSE` for invalid days, or `NA` where + #' the row in argument `ymd` has `NA` values. + valid_days = function(ymd) { + ymd$year & ymd$month >= 1L & ymd$month <= 12L & ymd$day >= 1L & + ymd$day <= c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month] + }, + + #' @description Determine the number of days in the month of the calendar. + #' @param ymd `data.frame`, optional, with dates parsed into their parts. + #' @return A vector indicating the number of days in each month for the + #' dates supplied as argument `ymd`. If no dates are supplied, the number + #' of days per month for the calendar as a vector of length 12. + month_days = function(ymd = NULL) { + if (is.null(ymd)) return(c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)) + + res <- c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month] + res[which(is.na(ymd$year))] <- NA + res + }, + + #' @description Indicate which years are leap years. + #' @param yr Integer vector of years to test. + #' @return Logical vector with the same length as argument `yr`. Since this + #' calendar does not use leap days, all values will be `FALSE`, or `NA` + #' where argument `yr` is `NA`. + leap_year = function(yr) { + res <- rep(FALSE, length(yr)) + res[which(is.na(yr))] <- NA + res + }, + + #' @description Calculate difference in days between a `data.frame` of time + #' parts and the origin. + #' + #' @param x `data.frame`. Dates to calculate the difference for. + #' + #' @return Integer vector of a length equal to the number of rows in + #' argument `x` indicating the number of days between `x` and the `origin`, + #' or `NA` for rows in `x` with `NA` values. + date2offset = function(x) { + yd0 <- c(0L, 31L, 59L, 90L, 120L, 151L, 181L, 212L, 243L, 273L, 304L, 334L) # days diff of 1st of month to 1 January + (x$year - self$origin$year) * 365L + yd0[x$month] - yd0[self$origin$month] + x$day - self$origin$day + }, + + #' @description Calculate date parts from day differences from the origin. This + #' only deals with days as these are impacted by the calendar. + #' Hour-minute-second timestamp parts are handled in [CFCalendar]. + #' + #' @param x Integer vector of days to add to the origin. + #' @return A `data.frame` with columns 'year', 'month' and 'day' and as many + #' rows as the length of vector `x`. + offset2date = function(x) { + month <- c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L) + + # First process full years over the vector + yr <- self$origin$year + (x %/% 365L) + x <- x %% 365L + + # Remaining portion relative to the origin + x <- x + self$origin$day + ymd <- mapply(function(y, m, d) { + while (d > month[m]) { + d <- d - month[m] + m <- m + 1L + if (m == 13L) { + y <- y + 1L + m <- 1L + } + } + return(c(y, m, d)) + }, yr, self$origin$month, x) + data.frame(year = ymd[1L,], month = ymd[2L,], day = ymd[3L,], row.names = NULL) + } + ) +) diff --git a/R/CFCalendar366.R b/R/CFCalendar366.R new file mode 100644 index 0000000..4db2853 --- /dev/null +++ b/R/CFCalendar366.R @@ -0,0 +1,101 @@ +#' @title 366-day CF calendar +#' +#' @description This class represents a CF calendar of 366 days per year, having +#' leap days in every year. This calendar is not compatible with the standard +#' POSIXt calendar. +#' +#' This calendar supports dates before year 1 and includes the year 0. +#' +#' @aliases CFCalendar366 +#' @docType class +CFCalendar366 <- R6::R6Class("CFCalendar366", + inherit = CFCalendar, + public = list( + #' @description Create a new CF calendar of 366 days per year. + #' @param nm The name of the calendar. This must be "366_day" or "all_leap". + #' @param definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + #' @return A new instance of this class. + initialize = function(nm, definition) { + super$initialize(nm, definition) + }, + + #' @description Indicate which of the supplied dates are valid. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return Logical vector with the same length as argument `ymd` has rows + #' with `TRUE` for valid days and `FALSE` for invalid days, or `NA` where + #' the row in argument `ymd` has `NA` values. + valid_days = function(ymd) { + ymd$year & ymd$month >= 1L & ymd$month <= 12L & ymd$day >= 1L & + ymd$day <= c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month] + }, + + #' @description Determine the number of days in the month of the calendar. + #' @param ymd `data.frame`, optional, with dates parsed into their parts. + #' @return A vector indicating the number of days in each month for the + #' dates supplied as argument `ymd`. If no dates are supplied, the number + #' of days per month for the calendar as a vector of length 12. + month_days = function(ymd = NULL) { + if (is.null(ymd)) return(c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)) + + res <- c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month] + res[which(is.na(ymd$year))] <- NA + res + }, + + #' @description Indicate which years are leap years. + #' @param yr Integer vector of years to test. + #' @return Logical vector with the same length as argument `yr`. Since in + #' this calendar all years have a leap day, all values will be `TRUE`, or + #' `NA` where argument `yr` is `NA`. + leap_year = function(yr) { + res <- rep(TRUE, length(yr)) + res[which(is.na(yr))] <- NA + res + }, + + #' @description Calculate difference in days between a `data.frame` of time + #' parts and the origin. + #' + #' @param x `data.frame`. Dates to calculate the difference for. + #' + #' @return Integer vector of a length equal to the number of rows in + #' argument `x` indicating the number of days between `x` and the `origin`, + #' or `NA` for rows in `x` with `NA` values. + date2offset = function(x) { + yd0 <- c(0L, 31L, 60L, 91L, 121L, 152L, 182L, 213L, 244L, 274L, 305L, 335L) # days diff of 1st of month to 1 January + (x$year - self$origin$year) * 366L + yd0[x$month] - yd0[self$origin$month] + x$day - self$origin$day + }, + + #' @description Calculate date parts from day differences from the origin. This + #' only deals with days as these are impacted by the calendar. + #' Hour-minute-second timestamp parts are handled in [CFCalendar]. + #' + #' @param x Integer vector of days to add to the origin. + #' @return A `data.frame` with columns 'year', 'month' and 'day' and as many + #' rows as the length of vector `x`. + offset2date = function(x) { + month <- c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L) + + # First process full years over the vector + yr <- self$origin$year + (x %/% 366L) + x <- x %% 366L + + # Remaining portion relative to the origin + x <- x + self$origin$day + ymd <- mapply(function(y, m, d) { + while (d > month[m]) { + d <- d - month[m] + m <- m + 1L + if (m == 13L) { + y <- y + 1L + m <- 1L + } + } + return(c(y, m, d)) + }, yr, self$origin$month, x) + data.frame(year = ymd[1L,], month = ymd[2L,], day = ymd[3L,], row.names = NULL) + } + ) +) diff --git a/R/CFCalendarJulian.R b/R/CFCalendarJulian.R new file mode 100644 index 0000000..af6b8f0 --- /dev/null +++ b/R/CFCalendarJulian.R @@ -0,0 +1,157 @@ +#' @title Julian CF calendar +#' +#' @description This class represents a Julian calendar of 365 days per year, +#' with every fourth year being a leap year of 366 days. The months and the +#' year align with the standard calendar. This calendar is not compatible with +#' the standard POSIXt calendar. +#' +#' This calendar starts on 1 January of year 1: 0001-01-01 00:00:00. Any dates +#' before this will generate an error. +#' +#' @aliases CFCalendarJulian +#' @docType class +CFCalendarJulian <- R6::R6Class("CFCalendarJulian", + inherit = CFCalendar, + public = list( + #' @description Create a new CF calendar. + #' @param nm The name of the calendar. This must be "julian". This argument + #' is superfluous but maintained to be consistent with the initialization + #' methods of the parent and sibling classes. + #' @param definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + #' @return A new instance of this class. + initialize = function(nm, definition) { + super$initialize(nm, definition) + }, + + #' @description Indicate which of the supplied dates are valid. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return Logical vector with the same length as argument `ymd` has rows + #' with `TRUE` for valid days and `FALSE` for invalid days, or `NA` where + #' the row in argument `ymd` has `NA` values. + valid_days = function(ymd) { + ymd$year >= 1L & ymd$month >= 1L & ymd$month <= 12L & ymd$day >= 1L & + ifelse(self$leap_year(ymd$year), + ymd$day <= c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)[ymd$month], + ymd$day <= c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)[ymd$month]) + }, + + #' @description Determine the number of days in the month of the calendar. + #' @param ymd `data.frame`, optional, with dates parsed into their parts. + #' @return A vector indicating the number of days in each month for the + #' dates supplied as argument `ymd`. If no dates are supplied, the number + #' of days per month for the calendar as a vector of length 12, for a + #' regular year without a leap day. + month_days = function(ymd = NULL) { + if (is.null(ymd)) return(c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)) + + ifelse(self$leap_year(ymd$year), + c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)[ymd$month], + c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)[ymd$month]) + }, + + #' @description Indicate which years are leap years. + #' @param yr Integer vector of years to test. + #' @return Logical vector with the same length as argument `yr`. `NA` is + #' returned where elements in argument `yr` are `NA`. + leap_year = function(yr) { + yr %% 4L == 0L + }, + + #' @description Calculate difference in days between a `data.frame` of time + #' parts and the origin. + #' + #' @param x `data.frame`. Dates to calculate the difference for. + #' @return Integer vector of a length equal to the number of rows in + #' argument `x` indicating the number of days between `x` and the origin + #' of the calendar, or `NA` for rows in `x` with `NA` values. + date2offset = function(x) { + .julian_date2offset(x, self$origin) + }, + + #' @description Calculate date parts from day differences from the origin. This + #' only deals with days as these are impacted by the calendar. + #' Hour-minute-second timestamp parts are handled in [CFCalendar]. + #' + #' @param x Integer vector of days to add to the origin. + #' @return A `data.frame` with columns 'year', 'month' and 'day' and as many + #' rows as the length of vector `x`. + offset2date = function(x) { + common_days <- c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L) + leap_days <- c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L) + + # Is the leap day to consider ahead in the year from the base date (offset = 0) or in the next year (offset = 1) + offset <- as.integer(self$origin$month > 2L) + + # First process 4-year cycles of 1,461 days over the vector + yr <- self$origin$year + (x %/% 1461L) * 4L + x <- x %% 1461L + + # Remaining portion relative to the origin + x <- x + self$origin$day + ymd <- mapply(function(y, m, d) { + repeat { + leap <- (y + offset) %% 4L == 0L + ydays <- 365L + as.integer(leap) + + if (d <= 0L) { + d <- d + ydays + y <- y - 1L + if (d > 0L) break + } else if (d > ydays) { + d <- d - ydays + y <- y + 1L + } else break + } + + month <- if (leap) leap_days else common_days + + while (d > month[m]) { + d <- d - month[m] + m <- m + 1L + if (m == 13L) { + y <- y + 1L + m <- 1L + } + } + return(c(y, m, d)) + }, yr, self$origin$month, x) + data.frame(year = ymd[1L,], month = ymd[2L,], day = ymd[3L,], row.names = NULL) + } + ) +) + +# Internal function to calculate dates from offsets. This function is here +# because it is used by CFCalendarStandard. See further description in the +# methods. +.julian_date2offset <- function(x, origin) { + # days diff of 1st of month to 1 January in normal year + yd0 <- c(0L, 31L, 59L, 90L, 120L, 151L, 181L, 212L, 243L, 273L, 304L, 334L) + + origin_year <- origin$year + days_into_year <- yd0[origin$month] + origin$day + if (origin$month <= 2L && origin_year %% 4L == 0L) + days_into_year <- days_into_year - 1L + + mapply(function(y, m, d) { + if (is.na(y)) return(NA_integer_) + + # Adjust for where the leap day falls + if (y >= origin_year) + days <- if (m <= 2L && y %% 4L == 0L) -1L else 0L + else + days <- if (m > 2L && y %% 4L == 0L) 0L else -1L + + repeat { + if (y > origin_year) { + days <- days + 365L + as.integer(y %% 4L == 0L) + y <- y - 1L + } else if (y < origin_year) { + days <- days - 365L - as.integer(y %% 4L == 0L) + y <- y + 1L + } else break + } + days + yd0[m] + d - days_into_year + }, x$year, x$month, x$day) +} diff --git a/R/CFCalendarProleptic.R b/R/CFCalendarProleptic.R new file mode 100644 index 0000000..d5ceebb --- /dev/null +++ b/R/CFCalendarProleptic.R @@ -0,0 +1,96 @@ +#' @title Proleptic Gregorian CF calendar +#' +#' @description This class represents a standard CF calendar, but with the +#' Gregorian calendar extended backwards to before the introduction of the +#' Gregorian calendar. This calendar is compatible with the standard POSIXt +#' calendar, but note that daylight savings time is not considered. +#' +#' This calendar includes dates 1582-10-14 to 1582-10-05 (the gap between the +#' Gregorian and Julian calendars, which is observed by the standard +#' calendar), and extends to years before the year 1, including year 0. +#' +#' @aliases CFCalendarProleptic +#' @docType class +CFCalendarProleptic <- R6::R6Class("CFCalendarProleptic", + inherit = CFCalendar, + public = list( + #' @description Create a new CF calendar. + #' @param nm The name of the calendar. This must be "proleptic_gregorian". + #' This argument is superfluous but maintained to be consistent with the + #' initialization methods of the parent and sibling classes. + #' @param definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + #' @return A new instance of this class. + initialize = function(nm, definition) { + super$initialize(nm, definition) + }, + + #' @description Indicate which of the supplied dates are valid. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return Logical vector with the same length as argument `ymd` has rows + #' with `TRUE` for valid days and `FALSE` for invalid days, or `NA` where + #' the row in argument `ymd` has `NA` values. + valid_days = function(ymd) { + ymd$year & ymd$month >= 1L & ymd$month <= 12L & ymd$day >= 1L & + ifelse(self$leap_year(ymd$year), + ymd$day <= c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)[ymd$month], + ymd$day <= c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)[ymd$month]) + }, + + #' @description Determine the number of days in the month of the calendar. + #' @param ymd `data.frame`, optional, with dates parsed into their parts. + #' @return Integer vector indicating the number of days in each month for + #' the dates supplied as argument `ymd`. If no dates are supplied, the + #' number of days per month for the calendar as a vector of length 12, for + #' a regular year without a leap day. + month_days = function(ymd = NULL) { + if (is.null(ymd)) return(c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)) + + ifelse(self$leap_year(ymd$year), + c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month], + c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month]) + }, + + #' @description Indicate which years are leap years. + #' @param yr Integer vector of years to test. + #' @return Logical vector with the same length as argument `yr`. `NA` is + #' returned where elements in argument `yr` are `NA`. + leap_year = function(yr) { + ((yr %% 4L == 0L) & (yr %% 100L > 0L)) | (yr %% 400L == 0L) + }, + + #' @description Indicate if the time series described using this calendar + #' can be safely converted to a standard date-time type (`POSIXct`, + #' `POSIXlt`, `Date`). + #' @param offsets The offsets from the CFtime instance. + #' @return `TRUE`. + POSIX_compatible = function(offsets) { + TRUE + }, + + #' @description Calculate difference in days between a `data.frame` of time + #' parts and the origin. + #' + #' @param x `data.frame`. Dates to calculate the difference for. + #' @return Integer vector of a length equal to the number of rows in + #' argument `x` indicating the number of days between `x` and the `origin`, + #' or `NA` for rows in `x` with `NA` values. + date2offset = function(x) { + origin <- lubridate::make_date(self$origin$year, self$origin$month, self$origin$day) + as.integer(lubridate::make_date(x$year, x$month, x$day) - origin) + }, + + #' @description Calculate date parts from day differences from the origin. This + #' only deals with days as these are impacted by the calendar. + #' Hour-minute-second timestamp parts are handled in [CFCalendar]. + #' + #' @param x Integer vector of days to add to the origin. + #' @return A `data.frame` with columns 'year', 'month' and 'day' and as many + #' rows as the length of vector `x`. + offset2date = function(x) { + dt <- lubridate::make_date(self$origin$year, self$origin$month, self$origin$day) + lubridate::days(x) + data.frame(year = lubridate::year(dt), month = lubridate::month(dt), day = lubridate::mday(dt), row.names = NULL) + } + ) +) diff --git a/R/CFCalendarStandard.R b/R/CFCalendarStandard.R new file mode 100644 index 0000000..00ac967 --- /dev/null +++ b/R/CFCalendarStandard.R @@ -0,0 +1,209 @@ +#' @title Standard CF calendar +#' +#' @description This class represents a standard calendar of 365 or 366 days per +#' year. This calendar is compatible with the standard POSIXt calendar for +#' periods after the introduction of the Gregorian calendar, 1582-10-15 +#' 00:00:00. The calendar starts at 0001-01-01 00:00:00, e.g. the start of the +#' Common Era. +#' +#' Note that this calendar, despite its name, is not the same as that used in +#' ISO8601 or many computer systems for periods prior to the introduction of +#' the Gregorian calendar. Use of the "proleptic_gregorian" calendar is +#' recommended for periods before or straddling the introduction date, as that +#' calendar is compatible with POSIXt on most OSes. +#' +#' @importFrom lubridate make_date year month mday days +#' @aliases CFCalendarStandard +#' @docType class +CFCalendarStandard <- R6::R6Class("CFCalendarStandard", + inherit = CFCalendar, + public = list( + #' @field gap The integer offset for 1582-10-15 00:00:00, when the Gregorian + #' calendar started, or 1582-10-05, when the gap between Julian and + #' Gregorian calendars started. The former is set when the calendar origin + #' is more recent, the latter when the origin is prior to the gap. + gap = -1L, + + #' @description Create a new CF calendar. + #' @param nm The name of the calendar. This must be "standard" or + #' "gregorian" (deprecated). + #' @param definition The string that defines the units and the origin, as + #' per the CF Metadata Conventions. + #' @return A new instance of this class. + initialize = function(nm, definition) { + super$initialize(nm, definition) + + self$gap <- if (self$is_gregorian_date(self$origin)) + as.integer(lubridate::make_date(1582, 10, 15) - + lubridate::make_date(self$origin$year, self$origin$month, self$origin$day)) + else + .julian_date2offset(data.frame(year = 1582, month = 10, day = 5), self$origin) + }, + + #' @description Indicate which of the supplied dates are valid. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return Logical vector with the same length as argument `ymd` has rows + #' with `TRUE` for valid days and `FALSE` for invalid days, or `NA` where + #' the row in argument `ymd` has `NA` values. + valid_days = function(ymd) { + ymd$year >= 1L & ymd$month >= 1L & ymd$month <= 12L & ymd$day >= 1L & + ifelse(self$is_gregorian_date(ymd), + # Gregorian calendar + ifelse(self$leap_year(ymd$year), + ymd$day <= c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month], + ymd$day <= c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month]), + # Julian calendar + ifelse(ymd$year == 1582L & ymd$month == 10L & ymd$day > 4L, + FALSE, # days 1582-10-05 - 1582-10-14 do not exist + ifelse(ymd$year %% 4L == 0L, + ymd$day <= c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month], + ymd$day <= c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month]) + ) + ) + }, + + #' @description Indicate which of the supplied dates are in the Gregorian + #' part of the calendar, e.g. 1582-10-15 or after. + #' @param ymd `data.frame` with dates parsed into their parts in columns + #' `year`, `month` and `day`. Any other columns are disregarded. + #' @return Logical vector with the same length as argument `ymd` has rows + #' with `TRUE` for days in the Gregorian part of the calendar and `FALSE` + #' otherwise, or `NA` where the row in argument `ymd` has `NA` values. + is_gregorian_date = function(ymd) { + ymd$year > 1582L | (ymd$year == 1582L & (ymd$month > 10L | (ymd$month == 10L & ymd$day >= 15L))) + }, + + #' @description Indicate if the time series described using this calendar + #' can be safely converted to a standard date-time type (`POSIXct`, + #' `POSIXlt`, `Date`). This is only the case if all offsets are for + #' timestamps fall on or after the start of the Gregorian calendar, + #' 1582-10-15 00:00:00. + #' @param offsets The offsets from the CFtime instance. + #' @return `TRUE`. + POSIX_compatible = function(offsets) { + all(offsets >= self$gap) + }, + + #' @description Determine the number of days in the month of the calendar. + #' @param ymd `data.frame`, optional, with dates parsed into their parts. + #' @return A vector indicating the number of days in each month for the + #' dates supplied as argument `ymd`. If no dates are supplied, the number + #' of days per month for the calendar as a vector of length 12, for a + #' regular year without a leap day. + month_days = function(ymd = NULL) { + if (is.null(ymd)) return(c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)) + + ifelse(self$leap_year(ymd$year), + c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month], + c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[ymd$month]) + }, + + #' @description Indicate which years are leap years. + #' @param yr Integer vector of years to test. + #' @return Logical vector with the same length as argument `yr`. `NA` is + #' returned where elements in argument `yr` are `NA`. + leap_year = function(yr) { + ifelse(yr <= 1582L, + yr %% 4L == 0L, + ((yr %% 4L == 0L) & (yr %% 100L > 0L)) | (yr %% 400L == 0L) + ) + }, + + #' @description Calculate difference in days between a `data.frame` of time + #' parts and the origin. + #' + #' @param x `data.frame`. Dates to calculate the difference for. + #' @return Integer vector of a length equal to the number of rows in + #' argument `x` indicating the number of days between `x` and the origin + #' of the calendar, or `NA` for rows in `x` with `NA` values. + date2offset = function(x) { + if (self$gap > 0L) { + # self$origin in Julian calendar part + greg0 <- lubridate::make_date(1582, 10, 15) + ifelse(self$is_gregorian_date(x), + # Calculate Gregorian dates from 1582-10-15, add gap + as.integer(lubridate::make_date(x$year, x$month, x$day) - greg0) + self$gap, + # Calculate julian days from self$origin + .julian_date2offset(x, self$origin) + ) + } else { + # self$origin in Gregorian calendar part + self_origin <- lubridate::make_date(self$origin$year, self$origin$month, self$origin$day) + julian0 <- data.frame(year = 1582L, month = 10L, day = 5L) + ifelse(self$is_gregorian_date(x), + # Calculate Gregorian dates from self$origin + as.integer(lubridate::make_date(x$year, x$month, x$day) - self_origin), + # Calculate julian days from 1582-10-05, add gap + .julian_date2offset(x, julian0) + self$gap + ) + } + }, + + #' @description Calculate date parts from day differences from the origin. This + #' only deals with days as these are impacted by the calendar. + #' Hour-minute-second timestamp parts are handled in [CFCalendar]. + #' + #' @param x Integer vector of days to add to the origin. + #' @return A `data.frame` with columns 'year', 'month' and 'day' and as many + #' rows as the length of vector `x`. + offset2date = function(x) { + if (self$gap <= 0L && all(x >= self$gap, na.rm = TRUE)) { + # If self$origin and all offsets are in the Gregorian calendar, use + # lubridate. Presumed to cover the majority of cases. + dt <- lubridate::make_date(self$origin$year, self$origin$month, self$origin$day) + lubridate::days(x) + data.frame(year = lubridate::year(dt), month = lubridate::month(dt), day = lubridate::mday(dt), row.names = NULL) + } else { + # Manage cases where self$origin is in the Julian calendar and/or `x` + # values straddle the Julian/Gregorian boundary. + common_days <- c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L) + leap_days <- c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L) + + # Is the leap day to consider ahead in the year from the base date (offset = 0) or in the next year (offset = 1) + offset <- as.integer(self$origin$month > 2L) + + # Correct `x` values that straddle the gap from self$origin + if (self$gap <= 0L) { # Gregorian origin + ndx <- which(x < self$gap) + x[ndx] <- x[ndx] - 10L + } else { # Julian origin + ndx <- which(x >= self$gap) + if (length(ndx)) + x[ndx] <- x[ndx] + 10L + } + + x <- x + self$origin$day + ymd <- mapply(function(y, m, d) { + repeat { + test <- y + offset + leap <- if (test <= 1582) (test) %% 4L == 0L + else ((test %% 4L == 0L) && (test %% 100L > 0L)) || (test %% 400L == 0L) + ydays <- 365L + as.integer(leap) + + if (d <= 0L) { + d <- d + ydays + y <- y - 1L + if (d > 0L) break + } else if (d > ydays) { + d <- d - ydays + y <- y + 1L + } else break + } + + month <- if (leap) leap_days else common_days + + while (d > month[m]) { + d <- d - month[m] + m <- m + 1L + if (m == 13L) { + y <- y + 1L + m <- 1L + } + } + return(c(y, m, d)) + }, self$origin$year, self$origin$month, x) + data.frame(year = ymd[1L,], month = ymd[2L,], day = ymd[3L,], row.names = NULL) + } + } + ) +) diff --git a/R/CFbounds.R b/R/CFbounds.R deleted file mode 100644 index fc148d2..0000000 --- a/R/CFbounds.R +++ /dev/null @@ -1,115 +0,0 @@ -#' Indicates if the time series has equidistant time steps -#' -#' This function returns `TRUE` if the time series has uniformly distributed -#' time steps between the extreme values, `FALSE` otherwise. First test without -#' sorting; this should work for most data sets. If not, only then offsets are -#' sorted. For most data sets that will work but for implied resolutions of -#' month, season, year, etc based on a "days" or finer datum unit this will fail -#' due to the fact that those coarser units have a variable number of days per -#' time step, in all calendars except for `360_day`. For now, an approximate -#' solution is used that should work in all but the most non-conformal exotic -#' arrangements. -#' -#' This function should only be called after offsets have been added. -#' -#' This is an internal function that should not be used outside of the CFtime -#' package. -#' -#' @param x CFtime. The time series to operate on. -#' -#' @returns `TRUE` if all time steps are equidistant, `FALSE` otherwise. -#' -#' @noRd -.ts_equidistant <- function(x) { - out <- all(diff(x@offsets) == x@resolution) - if (!out) { - doff <- diff(sort(x@offsets)) - out <- all(doff == x@resolution) - if (!out) { - # Don't try to make sense of totally non-standard arrangements such as - # datum units "years" or "months" describing sub-daily time steps. - # Also, 360_day calendar should be well-behaved so we don't want to get here. - if (x@datum@unit > 4L || x@datum@cal_id == 3L) return(FALSE) - - # Check if we have monthly or yearly data on a finer-scale datum - # This is all rather approximate but should be fine in most cases - # This accommodates middle-of-the-time-period offsets as per the CF Metadata Conventions - # Please report problems at https://github.com/pvanlaake/CFtime/issues - ddays <- range(doff) * CFt$units$per_day[x@datum@unit] - return((ddays[1] >= 28 && ddays[2] <= 31) || # months - (ddays[1] >= 8 && ddays[2] <= 11) || # dekads - (ddays[1] >= 90 && ddays[2] <= 92) || # seasons, quarters - (ddays[1] >= 365 && ddays[2] <= 366)) # years - } - } - out -} - -#' Set the bounds of a CFtime instance -#' -#' @param cf The CFtime instance -#' @param value The bounds to set. Either an array (2, length(cf)) or a logical -#' -#' @returns Returns `cf` invisibly -#' @noRd -.set_bounds <- function(cf, value) { - if (isFALSE(value)) cf@bounds <- FALSE - else if (isTRUE(value)) cf@bounds <- TRUE - else { - off <- cf@offsets - len <- length(off) - - if (len == 0L) - stop("Cannot set bounds when there are no offsets") - - if (is.matrix(value) && is.numeric(value)) { - if (!all(dim(value) == c(2L, len))) - stop("Replacement value has incorrect dimensions") - } else stop("Replacement value must be a numeric matrix or a single logical value") - - if (!(all(value[2L,] >= off) && all(off >= value[1L,]))) - stop("Values of the replacement value must surround the offset values") - - # Compress array to `TRUE`, if regular - if (len > 1L && identical(value[1L,2L:len], value[2L,1L:(len-1L)]) && - diff(range(diff(value[1L,]))) == 0) value <- TRUE - - cf@bounds <- value - } - invisible(cf) -} - -#' Return bounds -#' -#' @param cf The CFtime instance -#' @param format Optional. A string specifying a format for output -#' -#' @returns An array with dims(2, length(offsets)) with values for the bounds. -#' `NULL` if the bounds have not been set. -#' @noRd -.get_bounds <- function (cf, format) { - len <- length(cf@offsets) - if (len == 0L) return(NULL) - - bnds <- cf@bounds - if (is.logical(bnds)) { - if (!bnds) return(NULL) - - b <- seq(from = cf@offsets[1L] - cf@resolution * 0.5, - by = cf@resolution, - length.out = len + 1L) - if (!missing(format)) { - ts <- .offsets2time(b, cf@datum) - b <- .format_format(ts, tz(cf@datum), format) - } - return(rbind(b[1L:len], b[2L:(len+1L)])) - } - - # bnds is a matrix - if (missing(format)) return(bnds) - - ts <- .offsets2time(as.vector(bnds), cf@datum) - b <- .format_format(ts, tz(cf@datum), format) - dim(b) <- c(2L, len) - b -} diff --git a/R/CFdatum.R b/R/CFdatum.R deleted file mode 100644 index 7182f44..0000000 --- a/R/CFdatum.R +++ /dev/null @@ -1,117 +0,0 @@ -#' CFdatum class -#' -#' This internal class stores the information to represent date and time values using -#' the CF conventions. This class is not supposed to be used by end-users directly. -#' An instance is created by the exported `CFtime` class, which also exposes the -#' relevant properties of this class. -#' -#' The following calendars are supported: -#' -#' \itemize{ -#' \item `gregorian` or `standard`, the international standard calendar for civil use. -#' \item `proleptic_gregorian`, the standard calendar but extending before 1582-10-15 -#' when the Gregorian calendar was adopted. -#' \item `noleap` or `365_day`, all years have 365 days. -#' \item `all_leap` or `366_day`, all years have 366 days. -#' \item `360_day`, all years have 360 days, divided over 12 months of 30 days. -#' \item `julian`, every fourth year is a leap year (so including the years 1700, 1800, 1900, 2100, etc). -#' } -#' -#' @slot definition character. The string that defines the time unit and base date/time. -#' @slot unit numeric. The unit of time in which offsets are expressed. -#' @slot origin data.frame. Data frame with 1 row that defines the origin time. -#' @slot calendar character. The CF-calendar for the instance. -#' @slot cal_id numeric. The internal identifier of the CF-calendar to use. -#' -#' @returns An object of class CFdatum -#' @export -setClass("CFdatum", - slots = c( - definition = "character", - unit = "numeric", - origin = "data.frame", - calendar = "character", - cal_id = "numeric" - )) - -#' Create a CFdatum object -#' -#' This function creates an instance of the `CFdatum` class. After creation the -#' instance is read-only. The parameters to the call are typically read from a -#' CF-compliant data file with climatological observations or predictions. -#' -#' @param definition character. A character string describing the time coordinate -#' of a CF-compliant data file. -#' @param calendar character. A character string describing the calendar to use -#' with the time dimension definition string. -#' -#' @returns An object of the `CFdatum` class. -#' @export -CFdatum <- function(definition, calendar) { - stopifnot(length(definition) == 1L, length(calendar) == 1L) - calendar <- tolower(calendar) - - parts <- strsplit(definition, " ")[[1L]] - if ((length(parts) < 3L) || !(tolower(parts[2L]) %in% c("since", "after", "from", "ref", "per"))) - stop("Definition string does not appear to be a CF-compliant time coordinate description") - u <- which(CFt$CFunits$unit == tolower(parts[1L])) - if (length(u) == 0L) stop("Unsupported unit: ", parts[1L]) - - cal <- CFt$calendars$id[which(calendar == CFt$calendars$name)] - if (length(cal) == 0L) stop("Invalid calendar specification") - - nw <- methods::new("CFdatum", definition = definition, unit = CFt$CFunits$id[u], origin = data.frame(), calendar = calendar, cal_id = cal) - - dt <- .parse_timestamp(nw, paste(parts[3L:length(parts)], collapse = " ")) - if (is.na(dt$year[1L])) - stop("Definition string does not appear to be a CF-compliant time coordinate description: invalid base date specification") - nw@origin <- dt - - return(nw) -} - -setMethod("show", "CFdatum", function(object) { - if (object@origin$tz[1L] == "+0000") tz = "" else tz = object@origin$tz[1L] - cat("CF datum of origin:", - "\n Origin : ", origin_date(object), " ", origin_time(object), tz, - "\n Units : ", CFt$units$name[object@unit], - "\n Calendar: ", object@calendar, "\n", - sep = "") -}) - -#' Equivalence of CFdatum objects -#' -#' This function can be used to test if two `CFdatum` objects represent the same datum -#' for CF-convention time coordinates. Two `CFdatum` objects are considered equivalent -#' if they have the same definition string and the same calendar. Calendars -#' "standard", "gregorian" and "proleptic_gregorian" are considered equivalent, -#' as are the pairs of "365_day" and "no_leap", and "366_day" and "all_leap". -#' -#' @param e1,e2 CFdatum Instances of the CFdatum class. -#' -#' @returns `TRUE` if the `CFdatum` objects are equivalent, `FALSE` otherwise. -#' @noRd -.datum_equivalent <- function(e1, e2) { - sum(e1@origin[1L,1L:6L] != e2@origin[1L,1L:6L]) == 0L && # Offset column is NA - e1@unit == e2@unit && - e1@cal_id == e2@cal_id -} - -#' Compatibility of CFdatum objects -#' -#' This function can be used to test if two `CFdatum` objects have the same unit -#' and calendar for CF-convention time coordinates. Calendars "standard", -#' "gregorian" and "proleptic_gregorian" are considered compatible, as are the -#' pairs of "365_day" and "no_leap", and "366_day" and "all_leap". -#' -#' @param e1,e2 CFdatum Instances of the CFdatum class. -#' -#' @returns `TRUE` if the `CFdatum` objects are compatible, `FALSE` otherwise. -#' @noRd -.datum_compatible <- function(e1, e2) e1@unit == e2@unit && e1@cal_id == e2@cal_id - -origin_date <- function(x) sprintf("%04d-%02d-%02d", x@origin$year[1L], x@origin$month[1L], x@origin$day[1L]) - -origin_time <- function(x) .format_time(x@origin) - -tz <- function(x) x@origin$tz[1L] diff --git a/R/CFfactor.R b/R/CFfactor.R deleted file mode 100644 index 0f2009a..0000000 --- a/R/CFfactor.R +++ /dev/null @@ -1,416 +0,0 @@ -#' Create a factor from the offsets in an CFtime instance -#' -#' With this function a factor can be generated for the time series, or a part -#' thereof, contained in the `CFtime` instance. This is specifically interesting -#' for creating factors from the date part of the time series that aggregate the -#' time series into longer time periods (such as month) that can then be used to -#' process daily CF data sets using, for instance, `tapply()`. -#' -#' The factor will respect the calendar of the datum that the time series is -#' built on. For `period`s longer than a day this will result in a factor where -#' the calendar is no longer relevant (because calendars impacts days, not -#' dekads, months, quarters, seasons or years). -#' -#' The factor will be generated in the order of the offsets of the `CFtime` -#' instance. While typical CF-compliant data sources use ordered time series -#' there is, however, no guarantee that the factor is ordered as multiple -#' `CFtime` objects may have been merged out of order. -#' -#' If the `epoch` parameter is specified, either as a vector of years to include -#' in the factor, or as a list of such vectors, the factor will only consider -#' those values in the time series that fall within the list of years, inclusive -#' of boundary values. Other values in the factor will be set to `NA`. The years -#' need not be contiguous, within a single vector or among the list items, or in -#' order. -#' -#' The following periods are supported by this function: -#' -#' \itemize{ -#' \item `year`, the year of each offset is returned as "YYYY". -#' \item `season`, the meteorological season of each offset is returned as -#' "Sx", with x being 1-4, preceeded by "YYYY" if no `epoch` is -#' specified. Note that December dates are labeled as belonging to the -#' subsequent year, so the date "2020-12-01" yields "2021S1". This implies -#' that for standard CMIP files having one or more full years of data the -#' first season will have data for the first two months (January and -#' February), while the final season will have only a single month of data -#' (December). -#' \item `quarter`, the calendar quarter of each offset is returned as "Qx", -#' with x being 1-4, preceeded by "YYYY" if no `epoch` is specified. -#' \item `month`, the month of each offset is returned as "01" to -#' "12", preceeded by "YYYY-" if no `epoch` is specified. This is the default -#' period. -#' \item `dekad`, ten-day periods are returned as -#' "Dxx", where xx runs from "01" to "36", preceeded by "YYYY" if no `epoch` -#' is specified. Each month is subdivided in dekads as follows: 1- days 01 - -#' 10; 2- days 11 - 20; 3- remainder of the month. -#' \item `day`, the month and day of each offset are returned as "MM-DD", -#' preceeded by "YYYY-" if no `epoch` is specified. -#' } -#' -#' It is not possible to create a factor for a period that is shorter than the -#' temporal resolution of the source data set from which the `cf` argument -#' derives. As an example, if the source data set has monthly data, a dekad or -#' day factor cannot be created. -#' -#' Creating factors for other periods is not supported by this function. Factors -#' based on the timestamp information and not dependent on the calendar can -#' trivially be constructed from the output of the [as_timestamp()] function. -#' -#' For non-epoch factors the attribute 'CFtime' of the result contains a CFtime -#' instance that is valid for the result of applying the factor to a data set -#' that the `cf` argument is associated with. In other words, if CFtime instance -#' 'Acf' describes the temporal dimension of data set 'A' and a factor 'Af' is -#' generated from 'Acf', then `attr(Af, "CFtime")` describes the temporal -#' dimension of the result of, say, `apply(A, 1:2, tapply, Af, FUN)`. The -#' 'CFtime' attribute is `NULL` for epoch factors. -#' -#' @param cf CFtime. An instance of the `CFtime` class whose offsets will -#' be used to construct the factor. -#' @param period character. A character string with one of the values -#' "year", "season", "quarter", "month" (the default), "dekad" or "day". -#' @param epoch numeric or list, optional. Vector of years for which to -#' construct the factor, or a list whose elements are each a vector of years. -#' If `epoch` is not specified, the factor will use the entire time series for -#' the factor. -#' -#' @returns If `epoch` is a single vector or not specified, a factor with a -#' length equal to the number of offsets in `cf`. If `epoch` is a list, a list -#' with the same number of elements and names as `epoch`, each containing a -#' factor. Elements in the factor will be set to `NA` for time series values -#' outside of the range of specified years. -#' -#' The factor, or factors in the list, have attributes 'period', 'epoch' and -#' 'CFtime'. Attribute 'period' holds the value of the `period` argument. -#' Attribute 'epoch' indicates the number of years that are included in the -#' epoch, or -1 if no `epoch` is provided. Attribute 'CFtime' holds an -#' instance of CFtime that has the same definition as `cf`, but with offsets -#' corresponding to the mid-point of non-epoch factor levels; if the `epoch` -#' argument is specified, attribute 'CFtime' is `NULL`. -#' @seealso [cut()] creates a non-epoch factor for arbitrary cut points. -#' @export -#' -#' @examples -#' cf <- CFtime("days since 1949-12-01", "360_day", 19830:54029) -#' -#' # Create a dekad factor for the whole time series -#' f <- CFfactor(cf, "dekad") -#' -#' # Create three monthly factors for early, mid and late 21st century epochs -#' ep <- CFfactor(cf, epoch = list(early = 2021:2040, mid = 2041:2060, late = 2061:2080)) -CFfactor <- function(cf, period = "month", epoch = NULL) { - if (!(methods::is(cf, "CFtime"))) stop("First argument to CFfactor() must be an instance of the `CFtime` class") - if (length(cf@offsets) < 10L) stop("Cannot create a factor for very short time series") - - period <- tolower(period) - if (!((length(period) == 1L) && (period %in% CFt$factor_periods))) - stop("Period specifier must be a single value of a supported period") - - # No fine-grained period factors for coarse source data - timestep <- CFt$units$seconds[cf@datum@unit] * cf@resolution; - if ((period == "year") && (timestep > 86400 * 366) || - (period %in% c("season", "quarter")) && (timestep > 86400 * 90) || # Somewhat arbitrary - (period == "month") && (timestep > 86400 * 31) || - (period == "dekad") && (timestep > 86400) || # Must be constructed from daily or finer data - (period == "day") && (timestep > 86400)) # Must be no longer than a day - stop("Cannot produce a short period factor from source data with long time interval") - - time <- .offsets2time(cf@offsets, cf@datum) - months <- c("01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12") - - if (is.null(epoch)) { - # Create the factor for the specified period as well as bounds dates for a - # new CFtime instance for the factor. Lower bounds for the factor level is - # easy, upper bound of last level takes effort. - switch(period, - "year" = { - out <- as.factor(sprintf("%04d", time$year)) - l <- levels(out) - dt <- c(paste0(l, "-01-01"), sprintf("%04d-01-01", as.integer(l[nlevels(out)]) + 1L)) - }, - "season" = { - if (!requireNamespace("stringr")) - stop("Must install package `stringr` to use this functionality.") - - out <- as.factor( - ifelse(time$month == 12L, sprintf("%04dS1", time$year + 1L), - sprintf("%04dS%d", time$year, time$month %/% 3L + 1L))) - l <- levels(out) - dt <- ifelse(substr(l, 6L, 6L) == "1", paste0(as.integer(substr(l, 1L, 4L)) - 1L, "-12-01"), - stringr::str_replace_all(l, c("S2" = "-03-01", "S3" = "-06-01", "S4" = "-09-01"))) - ll <- l[nlevels(out)] - lp <- as.integer(substr(ll, 6L, 6L)) - if (lp == 1L) - dt <- c(dt, sprintf("%04d-03-01", as.integer(substr(ll, 1L, 4L)) + 1L)) - else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp * 3L)) - }, - "quarter" = { - if (!requireNamespace("stringr")) - stop("Must install package `stringr` to use this functionality.") - - out <- as.factor(sprintf("%04dQ%d", time$year, (time$month - 1L) %/% 3L + 1L)) - l <- levels(out) - dt <- stringr::str_replace_all(l, c("Q1" = "-01-01", "Q2" = "-04-01", "Q3" = "-07-01", "Q4" = "-10-01")) - ll <- l[nlevels(out)] - lp <- as.integer(substr(ll, 6L, 6L)) - if (lp == 4L) - dt <- c(dt, sprintf("%04d-01-01", as.integer(substr(ll, 1L, 4L)) + 1L)) - else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp * 3L + 1L)) - }, - "month" = { - out <- as.factor(sprintf("%04d-%s", time$year, months[time$month])) - l <- levels(out) - dt <- paste0(l, "-01") - ll <- l[nlevels(out)] - lp <- as.integer(substr(ll, 6L, 7L)) - if (lp == 12L) - dt <- c(dt, sprintf("%04d-01-01", as.integer(substr(ll, 1L, 4L)) + 1L)) - else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp + 1L)) - }, - "dekad" = { - out <- as.factor(sprintf("%04dD%02d", time$year, (time$month - 1L) * 3L + pmin.int((time$day - 1L) %/% 10L + 1L, 3L))) - l <- levels(out) - dk <- as.integer(substr(l, 6L, 7L)) - 1L - dt <- sprintf("%s-%02d-%s", substr(l, 1L, 4L), dk %/% 3L + 1L, c("01", "11", "21")[dk %% 3L + 1L]) - ll <- l[nlevels(out)] - lp <- as.integer(substr(ll, 6L, 7L)) - yr <- as.integer(substr(lp, 1L, 4L)) - if (lp == 36L) - dt <- c(dt, sprintf("%04d-01-01", yr + 1L)) - else dt <- c(dt, sprintf("%04d-%02d-%s", yr, (lp + 1L) %/% 3L + 1L, c("01", "11", "21")[(lp + 1L) %% 3L + 1L])) - }, - "day" = { - out <- as.factor(sprintf("%04d-%02d-%02d", time$year, time$month, time$day)) - l <- levels(out) - lp <- l[nlevels(out)] - last <- .offsets2time(.parse_timestamp(cf@datum, lp)$offset, cf@datum) - dt <- c(l, sprintf("%04d-%02d-%02d", last$year, last$month, last$day)) - } - ) - - # Convert bounds dates to an array of offsets, find mid-points, create new CFtime instance - off <- .parse_timestamp(cf@datum, dt)$offset - off[is.na(off)] <- 0 # This can happen only when the time series starts at or close to the datum origin, for seasons - noff <- length(off) - bnds <- rbind(off[1L:(noff - 1L)], off[2L:noff]) - off <- bnds[1L,] + (bnds[2L,] - bnds[1L,]) * 0.5 - new_cf <- CFtime(cf@datum@definition, cf@datum@calendar, off) - bounds(new_cf) <- TRUE - - # Bind attributes to the factor - attr(out, "epoch") <- -1L - attr(out, "period") <- period - attr(out, "CFtime") <- new_cf - return(out) - } - - # Epoch factor - if (is.numeric(epoch)) ep <- list(epoch) - else if ((is.list(epoch) && all(unlist(lapply(epoch, is.numeric))))) ep <- epoch - else stop("When specified, the `epoch` parameter must be a numeric vector or a list thereof") - - out <- lapply(ep, function(years) { - f <- switch(period, - "year" = ifelse(time$year %in% years, sprintf("%04d", time$year), NA_character_), - "season" = ifelse((time$month == 12L) & ((time$year + 1L) %in% years), "S1", - ifelse((time$month < 12L) & (time$year %in% years), sprintf("S%d", time$month %/% 3L + 1L), NA_character_)), - "quarter" = ifelse(time$year %in% years, sprintf("Q%d", (time$month - 1L) %/% 3L + 1L), NA_character_), - "month" = ifelse(time$year %in% years, months[time$month], NA_character_), - "dekad" = ifelse(time$year %in% years, sprintf("D%02d", (time$month - 1L) * 3L + pmin.int((time$day - 1L) %/% 10L + 1L, 3L)), NA_character_), - "day" = ifelse(time$year %in% years, sprintf("%s-%02d", months[time$month], time$day), NA_character_) - ) - f <- as.factor(f) - attr(f, "epoch") <- length(years) - attr(f, "period") <- period - attr(f, "CFtime") <- NULL - f - }) - if (is.numeric(epoch)) out <- out[[1L]] - else names(out) <- names(epoch) - return(out) -} - -#' Number of base time units in each factor level -#' -#' Given a factor as returned by [CFfactor()] and the `CFtime` instance from -#' which the factor was derived, this function will return a numeric vector with -#' the number of time units in each level of the factor. -#' -#' The result of this function is useful to convert between absolute and -#' relative values. Climate change anomalies, for instance, are usually computed -#' by differencing average values between a future period and a baseline period. -#' Going from average values back to absolute values for an aggregate period -#' (which is typical for temperature and precipitation, among other variables) -#' is easily done with the result of this function, without having to consider -#' the specifics of the calendar of the data set. -#' -#' If the factor `f` is for an epoch (e.g. spanning multiple years and the -#' levels do not indicate the specific year), then the result will indicate the -#' number of time units of the period in a regular single year. In other words, -#' for an epoch of 2041-2060 and a monthly factor on a standard calendar with a -#' `days` unit, the result will be `c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)`. -#' Leap days are thus only considered for the `366_day` and `all_leap` calendars. -#' -#' Note that this function gives the number of time units in each level of the -#' factor - the actual number of data points in the `cf` instance per factor -#' level may be different. Use [CFfactor_coverage()] to determine the actual -#' number of data points or the coverage of data points relative to the factor -#' level. -#' -#' @param cf CFtime. An instance of CFtime. -#' @param f factor or list. A factor or a list of factors derived from the -#' parameter `cf`. The factor or list thereof should generally be generated by -#' the function [CFfactor()]. -#' -#' @returns If `f` is a factor, a numeric vector with a length equal to the -#' number of levels in the factor, indicating the number of time units in each -#' level of the factor. If `f` is a list of factors, a list with each element -#' a numeric vector as above. -#' @export -#' -#' @examples -#' cf <- CFtime("days since 2001-01-01", "365_day", 0:364) -#' f <- CFfactor(cf, "dekad") -#' CFfactor_units(cf, f) -CFfactor_units <- function(cf, f) { - if (!(methods::is(cf, "CFtime"))) stop("First argument to `CFfactor_units()` must be an instance of the `CFtime` class") - - if (is.list(f)) factors <- f else factors <- list(f) - if (!(all(unlist(lapply(factors, function(x) is.factor(x) && is.numeric(attr(x, "epoch")) && - attr(x, "period") %in% CFt$factor_periods))))) - stop("Argument `f` must be a factor generated by the function `CFfactor()`") - - cal <- cf@datum@cal_id - upd <- CFt$units$per_day[cf@datum@unit] - out <- lapply(factors, function(fac) .factor_units(fac, cal, upd)) - if (is.factor(f)) out <- out[[1L]] - return(out) -} - -#' Calculate time units in factors -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param f factor. Factor as generated by `CFfactor()`. -#' @param cal numeric. Calendar id of the `CFtime()` instance. -#' @param upd numeric. Number of units per day, from the `CFt` environment. -#' -#' @returns A vector as long as the number of levels in the factor. -#' @noRd -.factor_units <- function(f, cal, upd) { - period <- attr(f, "period") - if (cal == 3L) { - res <- rep(c(360L, 90L, 90L, 30L, 10L, 1L)[which(CFt$factor_periods == period)], nlevels(f)) - } else { - if (attr(f, "epoch") > 0L) { - if (cal %in% c(1L, 2L, 4L)) { - res <- switch(period, - "year" = rep(365L, nlevels(f)), - "season" = c(90L, 92L, 92L, 91L)[as.integer(substr(levels(f), 2, 2))], - "quarter" = c(90L, 91L, 92L, 92L)[as.integer(substr(levels(f), 2, 2))], - "month" = c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[as.integer(levels(f))], - "dekad" = { - dk <- as.integer(substr(levels(f), 2L, 3L)) - ifelse(dk %% 3L > 0L | dk %in% c(12L, 18L, 27L, 33L), 10L, - ifelse(dk %in% c(3L, 9L, 15L, 21L, 24L, 30L, 36L), 11L, 8L)) - }, - "day" = rep(1L, nlevels(f)) - ) - } else if (cal == 5L) { - res <- switch(period, - "year" = rep(366L, nlevels(f)), - "season" = c(91L, 92L, 92L, 91L)[as.integer(substr(levels(f), 2, 2))], - "quarter" = c(91L, 91L, 92L, 92L)[as.integer(levels(f))], - "month" = c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[as.integer(levels(f))], - "dekad" = { - dk <- as.integer(substr(levels(f), 2L, 3L)) - ifelse(dk %% 3L > 0L | dk %in% c(12L, 18L, 27L, 33L), 10L, - ifelse(dk %in% c(3L, 9L, 15L, 21L, 24L, 30L, 36L), 11L, 9L)) - }, - "day" = rep(1L, nlevels(f)) - ) - } - } else { # not an epoch factor - res <- switch(period, - "year" = ifelse(.is_leap_year(as.integer(levels(f)), cal), 366L, 365L), - "season" = { - year <- as.integer(substr(levels(f), 1L, 4L)) - season <- as.integer(substr(levels(f), 6L, 6L)) - ifelse(.is_leap_year(year, cal), c(91L, 92L, 92L, 91L)[season], c(90L, 92L, 92L, 91L)[season]) - }, - "quarter" = { - year <- as.integer(substr(levels(f), 1L, 4L)) - qtr <- as.integer(substr(levels(f), 6L, 6L)) - ifelse(.is_leap_year(year, cal), c(91L, 91L, 92L, 92L)[qtr], c(90L, 91L, 92L, 92L)[qtr]) - }, - "month" = { - year <- as.integer(substr(levels(f), 1L, 4L)) - month <- as.integer(substr(levels(f), 6L, 7L)) - ifelse(.is_leap_year(year, cal), c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[month], - c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[month]) - }, - "dekad" = { - year <- as.integer(substr(levels(f), 1L, 4L)) - dk <- as.integer(substr(levels(f), 6L, 7L)) - ifelse(dk %% 3L > 0L | dk %in% c(12L, 18L, 27L, 33L), 10L, - ifelse(dk %in% c(3L, 9L, 15L, 21L, 24L, 30L, 36L), 11L, - ifelse(.is_leap_year(year, cal), 9L, 8L))) - }, - "day" = rep(1L, nlevels(f)) - ) - } - } - return(res * upd) -} - -#' Coverage of time elements for each factor level -#' -#' This function calculates the number of time elements, or the relative -#' coverage, in each level of a factor generated by [CFfactor()]. -#' -#' @param cf CFtime. An instance of CFtime. -#' @param f factor or list. A factor or a list of factors derived from the -#' parameter `cf`. The factor or list thereof should generally be generated by -#' the function [CFfactor()]. -#' @param coverage "absolute" or "relative". -#' -#' @returns If `f` is a factor, a numeric vector with a length equal to the -#' number of levels in the factor, indicating the number of units from the -#' time series in `cf` contained in each level of the factor when -#' `coverage = "absolute"` or the proportion of units present relative to the -#' maximum number when `coverage = "relative"`. If `f` is a list of factors, a -#' list with each element a numeric vector as above. -#' @export -#' -#' @examples -#' cf <- CFtime("days since 2001-01-01", "365_day", 0:364) -#' f <- CFfactor(cf, "dekad") -#' CFfactor_coverage(cf, f, "absolute") -CFfactor_coverage <- function(cf, f, coverage = "absolute") { - if (!(methods::is(cf, "CFtime"))) stop("First argument to `CFfactor_coverage()` must be an instance of the `CFtime` class") - - if (is.list(f)) factors <- f else factors <- list(f) - if (!(all(unlist(lapply(factors, function(x) is.factor(x) && is.numeric(attr(x, "epoch")) && - attr(x, "period") %in% CFt$factor_periods))))) - stop("Argument `f` must be a factor generated by the function `CFfactor()`") - - if (!(is.character(coverage) && coverage %in% c("absolute", "relative"))) - stop("Argument `coverage` must be a chaarcter string with a value of \"absolute\" or \"relative\"") - - if (coverage == "relative") { - cal <- cf@datum@cal_id - upd <- CFt$units$per_day[cf@datum@unit] - out <- lapply(factors, function(fac) { - res <- tabulate(fac) / .factor_units(fac, cal, upd) - yrs <- attr(fac, "epoch") - if (yrs > 0) res <- res / yrs - return(res) - }) - } else { - out <- lapply(factors, tabulate) - } - - if (is.factor(f)) out <- out[[1L]] - return(out) -} diff --git a/R/CFformat.R b/R/CFformat.R deleted file mode 100644 index bd29e1d..0000000 --- a/R/CFformat.R +++ /dev/null @@ -1,140 +0,0 @@ -#' Create a vector that represents CF timestamps -#' -#' This function generates a vector of character strings or `POSIXct`s that -#' represent the date and time in a selectable combination for each offset. -#' -#' The character strings use the format `YYYY-MM-DDThh:mm:ss±hhmm`, depending on -#' the `format` specifier. The date in the string is not necessarily compatible -#' with `POSIXt` - in the `360_day` calendar `2017-02-30` is valid and -#' `2017-03-31` is not. -#' -#' For the "standard", "gregorian" and "proleptic_gregorian" calendars the -#' output can also be generated as a vector of `POSIXct` values by specifying -#' `asPOSIX = TRUE`. -#' -#' @param cf CFtime. The `CFtime` instance that contains the offsets to use. -#' @param format character. A character string with either of the values "date" or -#' "timestamp". If the argument is not specified, the format used is -#' "timestamp" if there is time information, "date" otherwise. -#' @param asPOSIX logical. If `TRUE`, for "standard", "gregorian" and -#' "proleptic_gregorian" calendars the output is a vector of `POSIXct` - for -#' other calendars the result is `NULL`. Default value is `FALSE`. -#' -#' @seealso The [CFtime::format()] function gives greater flexibility through -#' the use of strptime-like format specifiers. -#' @returns A character vector where each element represents a moment in time -#' according to the `format` specifier. -#' @export -#' -#' @examples -#' cf <- CFtime("hours since 2020-01-01", "standard", seq(0, 24, by = 0.25)) -#' as_timestamp(cf, "timestamp") -#' -#' cf2 <- CFtime("days since 2002-01-21", "standard", 0:20) -#' tail(as_timestamp(cf2, asPOSIX = TRUE)) -#' -#' tail(as_timestamp(cf2)) -#' -#' tail(as_timestamp(cf2 + 1.5)) -as_timestamp <- function(cf, format = NULL, asPOSIX = FALSE) { - if (!(methods::is(cf, "CFtime"))) - stop("First argument to `as_timestamp()` must be an instance of the `CFtime` class") - if (asPOSIX && cf@datum@cal_id != 1L) - stop("Cannot make a POSIX timestamp on a non-standard calendar") - - time <- .offsets2time(cf@offsets, cf@datum) - if (nrow(time) == 0L) return() - - if (is.null(format)) - format <- ifelse(cf@datum@unit < 4L || .has_time(time), "timestamp", "date") - else if (!(format %in% c("date", "timestamp"))) - stop("Format specifier not recognized") - - if (asPOSIX) { - if (format == "date") ISOdate(time$year, time$month, time$day, 0L) - else ISOdatetime(time$year, time$month, time$day, time$hour, time$minute, time$second, "UTC") - } else .format_format(time, timezone(cf), format) -} - -#' Formatting of time strings from time elements -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param t data.frame. A data.frame representing timestamps. -#' -#' @returns A vector of character strings with a properly formatted time. If any -#' timestamp has a fractional second part, then all time strings will report -#' seconds at milli-second precision. -#' @noRd -.format_time <- function(t) { - fsec <- t$second %% 1L - if (any(fsec > 0L)) { - paste0(sprintf("%02d:%02d:", t$hour, t$minute), ifelse(t$second < 10, "0", ""), sprintf("%.3f", t$second)) - } else { - sprintf("%02d:%02d:%02d", t$hour, t$minute, t$second) - } -} - -#' Do the time elements have time-of-day information? -#' -#' If any time information > 0, then `TRUE` otherwise `FALSE` -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param t data.frame. A data.frame representing timestamps. -#' -#' @returns `TRUE` if any timestamp has time-of-day information, `FALSE` otherwise. -#' @noRd -.has_time <- function(t) { - any(t$hour > 0) || any(t$minute > 0) || any(t$second > 0) -} - -#' Do formatting of timestamps with format specifiers -#' -#' Internal function -#' -#' @param ts data.frame of decomposed offsets. -#' @param tz character. Time zone character string. -#' @param format character. A character string with the format specifiers, or -#' "date" or "timestamp". -#' @returns Character vector of formatted timestamps. -#' @noRd -.format_format <- function(ts, tz, format) { - if (format == "") format <- "timestamp" - if (format == "timestamp" && sum(ts$hour, ts$minute, ts$second) == 0) - format <- "date" - - if (format == "date") return(sprintf("%04d-%02d-%02d", ts$year, ts$month, ts$day)) - else if (format == "timestamp") return(sprintf("%04d-%02d-%02d %s", ts$year, ts$month, ts$day, .format_time(ts))) - - # Expand any composite specifiers - format <- stringr::str_replace_all(format, c("%F" = "%Y-%m-%d", "%R" = "%H:%M", "%T" = "%H:%M:%S")) - - # Splice in timestamp values for specifiers - # nocov start - if (grepl("%b|%h", format[1])) { - mon <- strftime(ISOdatetime(2024, 1:12, 1, 0, 0, 0), "%b") - format <- stringr::str_replace_all(format, "%b|%h", mon[ts$month]) - } - if (grepl("%B", format[1])) { - mon <- strftime(ISOdatetime(2024, 1:12, 1, 0, 0, 0), "%B") - format <- stringr::str_replace_all(format, "%B", mon[ts$month]) - } - # nocov end - format <- stringr::str_replace_all(format, "%[O]?d", sprintf("%02d", ts$day)) - format <- stringr::str_replace_all(format, "%e", sprintf("%2d", ts$day)) - format <- stringr::str_replace_all(format, "%[O]?H", sprintf("%02d", ts$hour)) - format <- stringr::str_replace_all(format, "%[O]?I", sprintf("%02d", ts$hour %% 12)) - # "%j" = ??? - format <- stringr::str_replace_all(format, "%[O]?m", sprintf("%02d", ts$month)) - format <- stringr::str_replace_all(format, "%[O]?M", sprintf("%02d", ts$minute)) - format <- stringr::str_replace_all(format, "%p", ifelse(ts$hour < 12, "AM", "PM")) - format <- stringr::str_replace_all(format, "%S", sprintf("%02d", as.integer(ts$second))) - format <- stringr::str_replace_all(format, "%[E]?Y", sprintf("%04d", ts$year)) - format <- stringr::str_replace_all(format, "%z", tz) - format <- stringr::str_replace_all(format, "%%", "%") - format -} - diff --git a/R/CFparse.R b/R/CFparse.R deleted file mode 100644 index 46fbcae..0000000 --- a/R/CFparse.R +++ /dev/null @@ -1,345 +0,0 @@ -#' Parse series of timestamps in CF format to date-time elements -#' -#' This function will parse a vector of timestamps in ISO8601 or UDUNITS format -#' into a data frame with columns for the elements of the timestamp: year, -#' month, day, hour, minute, second, time zone. Those timestamps that could not -#' be parsed or which represent an invalid date in the indicated `CFtime` -#' instance will have `NA` values for the elements of the offending timestamp -#' (which will generate a warning). -#' -#' The supported formats are the *broken timestamp* format from the UDUNITS -#' library and ISO8601 *extended*, both with minor changes, as suggested by the -#' CF Metadata Conventions. In general, the format is `YYYY-MM-DD hh:mm:ss.sss -#' hh:mm`. The year can be from 1 to 4 digits and is interpreted literally, so -#' `79-10-24` is the day Mount Vesuvius erupted and destroyed Pompeii, not -#' `1979-10-24`. The year and month are mandatory, all other fields are -#' optional. There are defaults for all missing values, following the UDUNITS -#' and CF Metadata Conventions. Leading zeros can be omitted in the UDUNITS -#' format, but not in the ISO8601 format. The optional fractional part can have -#' as many digits as the precision calls for and will be applied to the smallest -#' specified time unit. In the result of this function, if the fraction is -#' associated with the minute or the hour, it is converted into a regular -#' `hh:mm:ss.sss` format, i.e. any fraction in the result is always associated -#' with the second, rounded down to milli-second accuracy. The separator between -#' the date and the time can be a single whitespace character or a `T`. -#' -#' The time zone is optional and should have at least the hour or `Z` if -#' present, the minute is optional. The time zone hour can have an optional -#' sign. In the UDUNITS format the separator between the time and the time zone -#' must be a single whitespace character, in ISO8601 there is no separation -#' between the time and the timezone. Time zone names are not supported (as -#' neither UDUNITS nor ISO8601 support them) and will cause parsing to fail when -#' supplied, with one exception: the designator "UTC" is silently dropped (i.e. -#' interpreted as "00:00"). -#' -#' Currently only the extended formats (with separators between the elements) -#' are supported. The vector of timestamps may have any combination of ISO8601 -#' and UDUNITS formats. -#' -#' Timestamps that are prior to the datum are not allowed. The corresponding row -#' in the result will have `NA` values. -#' -#' @param cf CFtime. An instance of `CFtime` indicating the CF calendar and -#' datum to use when parsing the date. -#' @param x character. Vector of character strings representing timestamps in -#' ISO8601 extended or UDUNITS broken format. -#' -#' @returns A data frame with constituent elements of the parsed timestamps in -#' numeric format. The columns are year, month, day, hour, minute, second -#' (with an optional fraction), time zone (character string), and the -#' corresponding offset value from the datum. Invalid input data will appear -#' as `NA` - if this is the case, a warning message will be displayed - other -#' missing information on input will use default values. -#' @export -#' @examples -#' cf <- CFtime("days since 0001-01-01", "proleptic_gregorian") -#' -#' # This will have `NA`s on output and generate a warning -#' timestamps <- c("2012-01-01T12:21:34Z", "12-1-23", "today", -#' "2022-08-16T11:07:34.45-10", "2022-08-16 10.5+04") -#' CFparse(cf, timestamps) -CFparse <- function(cf, x) { - stopifnot(is.character(x), methods::is(cf, "CFtime")) - if (cf@datum@unit > 4) stop("Parsing of timestamps on a \"month\" or \"year\" datum is not supported.") - - out <- .parse_timestamp(cf@datum, x) - if (anyNA(out$year)) - warning("Some dates could not be parsed. Result contains `NA` values.") - if (length(unique(out$tz)) > 1) - warning("Timestamps have multiple time zones. Some or all may be different from the datum time zone.") - else if (out$tz[1] != timezone(cf)) - warning("Timestamps have time zone that is different from the datum.") - return(out) -} - -#' Parsing a vector of date-time strings, using a CFtime specification -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param datum CFdatum. The `CFdatum` instance that is the datum for the dates. -#' @param d character. A vector of strings of dates and times. -#' -#' @returns A data frame with columns year, month, day, hour, minute, second, -#' time zone, and offset. Invalid input data will appear as `NA`. -#' @noRd -.parse_timestamp <- function(datum, d) { - # Parsers - - # UDUNITS broken timestamp definition, with some changes - # broken_timestamp {broken_date}({space|T}+{broken_clock})? -- T not in definition but present in lexer code - # broken_date {year}-{month}(-{day})? - # year [+-]?[0-9]{1,4} - # month 0?[1-9]|1[0-2] - # day 0?[1-9]|[1-2][0-9]|30|31 - # broken_clock {hour}:{minute}(:{second})? - # hour [0-1]?[0-9]|2[0-3] -- sign on hour not allowed, but see timezone - # minute [0-5]?[0-9] - # second {minute}? -- leap second not supported - # fractional part (\.[0-9]*)? - # timezone [+-]?{hour}(:{minute})? -- added, present in lexer code - broken <- paste0( - "^", # anchor string at start - "([+-]?[0-9]{1,4})", # year, with optional sign - "-(0?[1-9]|1[012])", # month - "(?:-(0?[1-9]|[12][0-9]|3[01]))?", # day, optional - "(?:[T ]", # if a time is following, separate with a single whitespace character or a "T" - "([01]?[0-9]|2[0-3])", # hour - ":([0-5]?[0-9])", # minute - "(?::([0-5]?[0-9]))?", # second, optional - "(?:\\.([0-9]*))?", # optional fractional part of the smallest specified unit - ")?", # close optional time capture group - "(?:\\s", # if a time zone offset is following, separate with a single whitespace character - "([+-])?([01]?[0-9]|2[0-3])", # tz hour, with optional sign - "(?::(00|15|30|45))?", # optional tz minute, only 4 possible values - ")?", # close optional timezone capture group - "$" # anchor string at end - ) - - iso8601 <- paste0( - "^", - "([0-9]{4})", - "-(0[1-9]|1[012])", - "-(0[1-9]|[12][0-9]|3[01])?", - "(?:", - "[T ]([01][0-9]|2[0-3])", - "(?::([0-5][0-9]))?", - "(?::([0-5][0-9]))?", - "(?:\\.([0-9]*))?", - ")?", - "(?:([Z+-])([01][0-9]|2[0-3])?(?::(00|15|30|45))?", ## FIXME: Z?, smaller number of captures - ")?$" - ) - - # UDUNITS packed timestamp definition - NOT YET USED - # packed_timestamp {packed_date}({space|T}+{packed_clock})? -- T and space only allowed in packed time follows - # packed_date {year}({month}{day}?)? -- must be YYYYMMDD or else format is ambiguous, as per lexer code - # packed_clock {hour}({minute}{second}?)? -- must be HHMMSS to be unambiguous - # timezone [+-]?{hour}({minute})? -- added, present in lexer code, must be HHMM - # packed <- stringi::stri_join( - # "^", # anchor string at start - # "([+-]?[0-9]{4})", # year, with optional sign - # "(0[1-9]|1[012])?", # month, optional - # "(0[1-9]|[12][0-9]|3[01])?", # day, optional - # "(?:[T,\\s]", # if a time is following, separate with a single whitespace character or a "T" - # "([01][0-9]|2[0-3])?", # hour - # "([0-5][0-9])?", # minute, optional - # "([0-5]?[0-9](?:\\.[0-9]*)?)?", # second, optional, with optional fractional part - # ")?", # close optional time capture group - # "(?:\\s", # if a time zone offset is following, separate with a single whitespace character - # "([+-]?[01][0-9]|2[0-3])?", # hour, with optional sign - # "(00|15|30|45)?", # minute, only 4 possible values - # ")?", # close optional timezone capture group - # "$" # anchor string at end - # ) - - parse <- data.frame(year = integer(), month = integer(), day = integer(), - hour = integer(), minute = integer(), second = numeric(), frac = character(), - tz_sign = character(), tz_hour = character(), tz_min = character()) - - # Drop "UTC", if given - d <- trimws(gsub("UTC$", "", d)) - - cap <- utils::strcapture(iso8601, d, parse) - missing <- which(is.na(cap$year)) - if (length(missing) > 0) - cap[missing,] <- utils::strcapture(broken, d[missing], parse) - - # Assign any fraction to the appropriate time part - cap$frac[is.na(cap$frac)] <- "0" - frac <- as.numeric(paste0("0.", cap$frac)) - if (sum(frac) > 0) { - ndx <- which(!(is.na(cap$second)) & frac > 0) - if (length(ndx) > 0) cap$second[ndx] <- cap$second[ndx] + frac[ndx] - ndx <- which(!(is.na(cap$minute)) & is.na(cap$second) & frac > 0) - if (length(ndx) > 0) cap$second[ndx] <- 60 * frac[ndx] - ndx <- which(!(is.na(cap$hour)) & is.na(cap$minute) & frac > 0) - if (length(ndx) > 0) { - secs <- 3600 * frac - cap$minute[ndx] <- secs[ndx] %/% 60 - cap$second[ndx] <- secs[ndx] %% 60 - } - } - cap$frac <- NULL - - # Convert NA time parts to 0 - in CF default time is 00:00:00 when not specified - cap$hour[is.na(cap$hour)] <- 0 - cap$minute[is.na(cap$minute)] <- 0 - cap$second[is.na(cap$second)] <- 0 - - # Set timezone to default value where needed - ndx <- which(cap$tz_sign == "Z") - if (length(ndx) > 0) { - cap$tz_sign[ndx] <- "+" - cap$tz_hour[ndx] <- "00" - cap$tz_min[ndx] <- "00" - } - cap$tz <- paste0(ifelse(cap$tz_sign == "", "+", cap$tz_sign), - ifelse(cap$tz_hour == "", "00", cap$tz_hour), - ifelse(cap$tz_min == "", "00", cap$tz_min)) - cap$tz_sign <- cap$tz_hour <- cap$tz_min <- NULL - - # Set optional date parts to 1 if not specified - cap$month[is.na(cap$month)] <- 1 - cap$day[is.na(cap$day)] <- 1 - - # Check date validity - invalid <- mapply(function(y, m, d) {!.is_valid_calendar_date(y, m, d, datum@cal_id)}, - cap$year, cap$month, cap$day) - if (nrow(datum@origin) > 0) { - earlier <- mapply(function(y, m, d, dy, dm, dd) { - if (is.na(y)) return(TRUE) - if (y < dy) return(TRUE) - if (y == dy){ - if (m < dm) return(TRUE) - if (m == dm && d < dd) return(TRUE) - } - return(FALSE) - }, cap$year, cap$month, cap$day, datum@origin[1, 1], datum@origin[1, 2], datum@origin[1, 3]) - invalid <- invalid | earlier - } - if (sum(invalid) > 0) cap[invalid,] <- rep(NA, 7) - - # Calculate offsets - if (nrow(datum@origin) == 0) { # if there's no datum yet, don't calculate offsets - cap$offset <- rep(0, nrow(cap)) # this happens, f.i., when a CFdatum is created - } else { - days <- switch(datum@cal_id, - .date2offset_standard(cap, datum@origin), - .date2offset_julian(cap, datum@origin), - .date2offset_360day(cap, datum@origin), - .date2offset_365day(cap, datum@origin), - .date2offset_366day(cap, datum@origin) - ) - cap$offset <- round((days * 86400 + (cap$hour - datum@origin$hour[1]) * 3600 + - (cap$minute - datum@origin$minute[1]) * 60 + - cap$second - datum@origin$second) / CFt$units$seconds[datum@unit], 3) - } - return(cap) -} - -#' Calculate difference in days between a data.frame of time parts and a datum -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param x data.frame. Dates to calculate the difference for. -#' @param origin data.frame. The origin to calculate the difference against. -#' -#' @returns Vector of days between `x` and the `origin`, using the `standard` calendar. -#' @noRd -.date2offset_standard <- function(x, origin) { - yd0 <- c(0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334) # days diff of 1st of month to 1 January in normal year - - datum_year <- origin[1, 1] - datum_days_in_year <- yd0[origin[1, 2]] + origin[1, 3] - if ((origin[1, 2] <= 2) && ((datum_year %% 4 == 0 && datum_year %% 100 > 0) || datum_year %% 400 == 0)) - datum_days_in_year <- datum_days_in_year - 1 - - mapply(function(y, m, d) { - if (is.na(y)) return(NA_integer_) - if (m <= 2 && ((y %% 4 == 0 && y %% 100 > 0) || y %% 400 == 0)) days <- -1 else days <- 0 # -1 if in a leap year up to the leap day, 0 otherwise - repeat { - if (y > datum_year) { - days <- days + 365 + as.integer((y %% 4 == 0 && y %% 100 > 0) || y %% 400 == 0) - y <- y - 1 - } else break - } - days + yd0[m] + d - datum_days_in_year - }, x$year, x$month, x$day) -} - -#' Calculate difference in days between a data.frame of time parts and a datum -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param x data.frame. Dates to calculate the difference for. -#' @param origin data.frame. The origin to calculate the difference against. -#' -#' @returns Vector of days between `x` and the `origin`, using the `julian` calendar. -#' @noRd -.date2offset_julian <- function(x, origin) { - yd0 <- c(0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334) # days diff of 1st of month to 1 January in normal year - - datum_year <- origin[1, 1] - datum_days_in_year <- yd0[origin[1, 2]] + origin[1, 3] - if (origin[1, 2] <= 2 && datum_year %% 4 == 0) - datum_days_in_year <- datum_days_in_year - 1 - - mapply(function(y, m, d) { - if (is.na(y)) return(NA_integer_) - if (m <= 2 && y %% 4 == 0) days <- -1 else days <- 0 # -1 if in a leap year up to the leap day, 0 otherwise - repeat { - if (y > datum_year) { - days <- days + 365 + as.integer(y %% 4 == 0) - y <- y - 1 - } else break - } - days + yd0[m] + d - datum_days_in_year - }, x$year, x$month, x$day) -} - -#' Calculate difference in days between a data.frame of time parts and a datum -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param x data.frame. Dates to calculate the difference for. -#' @param origin data.frame. The origin to calculate the difference against. -#' -#' @returns Vector of days between `x` and the `origin`, using the `360_day` calendar. -#' @noRd -.date2offset_360day <- function(x, origin) { - (x$year - origin[1, 1]) * 360 + (x$month - origin[1, 2]) * 30 + x$day - origin[1, 3] -} - -#' Calculate difference in days between a data.frame of time parts and a datum -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param x data.frame. Dates to calculate the difference for. -#' @param origin data.frame. The origin to calculate the difference against. -#' -#' @returns Vector of days between `x` and the `origin`, using the `365_day` calendar. -#' @noRd -.date2offset_365day <- function(x, origin) { - yd0 <- c(0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334) # days diff of 1st of month to 1 January - (x$year - origin[1, 1]) * 365 + yd0[x$month] - yd0[origin[1, 2]] + x$day - origin[1, 3] -} - -#' Calculate difference in days between a data.frame of time parts and a datum -#' -#' This is an internal function that should not generally be used outside of -#' the CFtime package. -#' -#' @param x data.frame. Dates to calculate the difference for. -#' @param origin data.frame. The origin to calculate the difference against. -#' -#' @returns Vector of days between `x` and the `origin`, using the `366_day` calendar. -#' @noRd -.date2offset_366day <- function(x, origin) { - yd0 <- c(0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335) # days diff of 1st of month to 1 January - (x$year - origin[1, 1]) * 366 + yd0[x$month] - yd0[origin[1, 2]] + x$day - origin[1, 3] -} diff --git a/R/CFtime-package.R b/R/CFtime-package.R index fabd339..1ab8cfe 100644 --- a/R/CFtime-package.R +++ b/R/CFtime-package.R @@ -10,24 +10,29 @@ #' POSIXt). The CF time coordinate is formally defined in the #' [CF Metadata Conventions document](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#time-coordinate). #' -#' The package can create a `CFtime` instance from scratch or, more commonly, it -#' can use the dimension attributes and dimension variable values from a NetCDF +#' The package can create a [CFTime] instance from scratch or, more commonly, it +#' can use the dimension attributes and dimension variable values from a netCDF #' resource. The package does not actually do any of the reading and the user is -#' free to use their NetCDF package of preference (with the two main options -#' being [RNetCDF](https://cran.r-project.org/package=RNetCDF) and +#' free to use their netCDF package of preference. The recommended package to +#' use (with any netCDF resources) is [ncdfCF](https://cran.r-project.org/package=ncdfCF). +#' `ncdfCF` will automatically use this package to manage the "time" dimension +#' of any netCDF resource. As with this package, it reads and interprets the +#' attributes of the resource to apply the CF Metadata Conventions, supporting +#' axes, auxiliary coordinate variables, coordinate reference systems, etc. +#' Alternatively, for more basic netCDF reading and writing, the two main options +#' are [RNetCDF](https://cran.r-project.org/package=RNetCDF) and #' [ncdf4](https://cran.r-project.org/package=ncdf4)). #' #' **Create, modify, inquire** -#' * [CFtime()]: Create a CFtime instance -#' * [`Properties`][properties] of the CFtime instance -#' * [CFparse()]: Parse a vector of character timestamps into CFtime elements -#' * [`Compare`][CFtime-equivalent] two CFtime instances -#' * [`Merge`][CFtime-merge] two CFtime instances -#' * [`Append`][CFtime-append] additional time steps to a CFtime instance -#' * [as_timestamp()] and [format()]: Generate a vector of character or `POSIXct` timestamps from a CFtime instance +#' * [CFtime()]: Create a [CFTime] instance +#' * [`Properties`][properties] of the `CFTime` instance +#' * [parse_timestamps()]: Parse a vector of character timestamps into `CFTime` elements +#' * [`Compare`][CFtime-equivalent] two `CFTime` instances +#' * [`Merge`][CFtime-merge] two `CFTime` instances or append additional time steps to a `CFTime` instance +#' * [as_timestamp()] and [format()]: Generate a vector of character or `POSIXct` timestamps from a `CFTime` instance #' * [range()]: Timestamps of the two endpoints in the time series -#' * [is_complete()]: Does the CFtime instance have a complete time series between endpoints? -#' * [month_days()]: How many days are there in a month using the CFtime calendar? +#' * [is_complete()]: Does the `CFTime` instance have a complete time series between endpoints? +#' * [month_days()]: How many days are there in a month using the calendar of the `CFTime` instance? #' #' **Factors and coverage** #' * [CFfactor()] and [cut()]: Create factors for different time periods diff --git a/R/CFtime.R b/R/CFtime.R index 67ae06d..76d6b28 100644 --- a/R/CFtime.R +++ b/R/CFtime.R @@ -1,994 +1,837 @@ -#' CF Metadata Conventions time representation +#' @title CFTime class #' -#' @slot datum CFdatum. The origin upon which the `offsets` are based. -#' @slot resolution numeric. The average number of time units between offsets. -#' @slot offsets numeric. A vector of offsets from the datum. -#' @slot bounds Optional, the bounds for the offsets. If not set, it is the -#' logical value `FALSE`. If set, it is the logical value `TRUE` if the bounds -#' are regular with respect to the regularly spaced offsets (e.g. successive -#' bounds are contiguous and at mid-points between the offsets); otherwise a -#' `matrix` with columns for `offsets` and low values in the first row, high -#' values in the second row. +#' @description This class manages the "time" dimension of netCDF files that +#' follow the CF Metadata Conventions, and its productive use in R. #' -#' @returns An object of class CFtime. -#' @export -setClass("CFtime", - slots = c( - datum = "CFdatum", - resolution = "numeric", - offsets = "numeric", - bounds = "ANY" - )) - -#' Create a CFtime object -#' -#' This function creates an instance of the `CFtime` class. The arguments to -#' the call are typically read from a CF-compliant data file with climatological -#' observations or climate projections. Specification of arguments can also be -#' made manually in a variety of combinations. -#' -#' @param definition character. A character string describing the time coordinate -#' of a CF-compliant data file. -#' @param calendar character. A character string describing the calendar to use -#' with the time dimension definition string. Default value is "standard". -#' @param offsets numeric or character, optional. When numeric, a vector of -#' offsets from the origin in the time series. When a character vector, -#' timestamps in ISO8601 or UDUNITS format. When a character string, a -#' timestamp in ISO8601 or UDUNITS format and then a time series will be -#' generated with a separation between steps equal to the unit of measure in -#' the definition, inclusive of the definition timestamp. The unit of measure -#' of the offsets is defined by the time series definition. -#' -#' @returns An instance of the `CFtime` class. -#' @export -#' -#' @examples -#' CFtime("days since 1850-01-01", "julian", 0:364) -#' -#' CFtime("hours since 2023-01-01", "360_day", "2023-01-30T23:00") -CFtime <- function(definition, calendar = "standard", offsets = NULL) { - if (is.null(calendar)) calendar <- "standard" # This may occur when "calendar" attribute is not defined in the NC file - datum <- CFdatum(definition, calendar) - - if (is.array(offsets)) dim(offsets) <- NULL - - if (is.null(offsets)) { - methods::new("CFtime", datum = datum, resolution = NA_real_, offsets = numeric(), bounds = FALSE) - } else if (is.numeric(offsets)) { - stopifnot(.validOffsets(offsets, CFt$units$per_day[datum@unit])) - - if (length(offsets) > 1L) { - resolution <- (max(offsets) - min(offsets)) / (length(offsets) - 1L) - } else { - resolution <- NA_real_ - } - methods::new("CFtime", datum = datum, resolution = resolution, offsets = offsets, bounds = FALSE) - } else if (is.character(offsets)) { - time <- .parse_timestamp(datum, offsets) - if (anyNA(time$year)) stop("Offset argument contains invalid timestamps") - - if (length(offsets) == 1L) { - off <- seq(0L, time$offset[1L]) - resolution <- 1 - } else { - off <- time$offset - resolution <- (max(time$offset) - min(time$offset)) / (length(time$offset) - 1L) - } - methods::new("CFtime", datum = datum, resolution = resolution, offsets = off, bounds = FALSE) - } else stop("Invalid offsets for CFtime object") -} - -#' @aliases properties -#' @title Properties of a CFtime object -#' -#' @description These functions return the properties of an instance of the -#' `CFtime` class. The properties are all read-only, but offsets can be added -#' using the `+` operator. -#' -#' @param cf CFtime. An instance of `CFtime`. -#' -#' @returns `calendar()` and `unit()` return a character string. -#' `origin()` returns a data frame of timestamp elements with a single row -#' of data. `timezone()` returns the datum time zone as a character -#' string. `offsets()` returns a vector of offsets or `NULL` if no offsets -#' have been set. -#' -#' @examples -#' cf <- CFtime("days since 1850-01-01", "julian", 0:364) -#' definition(cf) -#' calendar(cf) -#' unit(cf) -#' timezone(cf) -#' origin(cf) -#' offsets(cf) -#' resolution(cf) - -#' @describeIn properties The definition string of the CFtime instance -#' @export -definition <- function(cf) cf@datum@definition - -#' @describeIn properties The calendar of the CFtime instance -#' @export -calendar <- function(cf) cf@datum@calendar - -#' @describeIn properties The unit of the CFtime instance -#' @export -unit <- function(cf) CFt$units$name[cf@datum@unit] - -#' @describeIn properties The origin of the CFtime instance in timestamp elements -#' @export -origin <- function(cf) cf@datum@origin - -#' @describeIn properties The time zone of the datum of the CFtime instance as a character string -#' @export -timezone <- function(cf) tz(cf@datum) - -#' @describeIn properties The offsets of the CFtime instance as a vector -#' @export -offsets <- function(cf) cf@offsets - -#' @describeIn properties The average separation between the offsets in the CFtime instance -#' @export -resolution <- function(cf) cf@resolution - -#' Bounds of the time offsets -#' -#' CF-compliant NetCDF files store time information as a single offset value for -#' each step along the dimension, typically centered on the valid interval of -#' the data (e.g. 12-noon for day data). Optionally, the lower and upper values -#' of the valid interval are stored in a so-called "bounds" variable, as an -#' array with two rows (lower and higher value) and a column for each offset. -#' With function `bounds()<-` those bounds can be set for a CFtime instance. The -#' bounds can be retrieved with the `bounds()` function. -#' -#' @param x A `CFtime` instance -#' @param format Optional. A single string with format specifiers, see -#' [CFtime::format()] for details. -#' -#' @returns If bounds have been set, an array of bounds values with dimensions -#' (2, length(offsets)). The first row gives the lower bound, the second row -#' the upper bound, with each column representing an offset of `x`. If the -#' `format` argument is specified, the bounds values are returned as strings -#' according to the format. `NULL` when no bounds have been set. -#' @aliases bounds -#' -#' @examples -#' cf <- CFtime("days since 2024-01-01", "standard", seq(0.5, by = 1, length.out = 366)) -#' as_timestamp(cf)[1:3] -#' bounds(cf) <- rbind(0:365, 1:366) -#' bounds(cf)[, 1:3] -#' bounds(cf, "%d-%b-%Y")[, 1:3] -setGeneric("bounds", function(x, format) standardGeneric("bounds"), signature = "x") - -#' @rdname bounds -#' @export -setMethod("bounds", "CFtime", function (x, format) .get_bounds(x, format)) - -#' @rdname bounds -#' @param value A `matrix` (or `array`) with dimensions (2, length(offsets)) -#' giving the lower (first row) and higher (second row) bounds of each offset -#' (this is the format that the CF Metadata Conventions uses for storage in -#' NetCDF files). Use `FALSE` to unset any previously set bounds, `TRUE` to -#' set regular bounds at mid-points between the offsets (which must be regular -#' as well). -setGeneric("bounds<-", function(x, value) standardGeneric("bounds<-"), signature = c("x")) - -#' @rdname bounds -#' @export -setMethod("bounds<-", "CFtime", function (x, value) invisible(.set_bounds(x, value))) - -#' The length of the offsets contained in the CFtime instance. -#' -#' @param x The CFtime instance whose length will be returned -#' -#' @return The number of offsets in the specified CFtime instance. -#' @export -#' -#' @examples -#' cf <- CFtime("days since 1850-01-01", "julian", 0:364) -#' length(cf) -setMethod("length", "CFtime", function(x) length(x@offsets)) - -#' Return the timestamps contained in the CFtime instance. -#' -#' @param x The CFtime instance whose timestamps will be returned +#' The class has a field `cal` which holds a specific calendar from the +#' allowed types (9 named calendars are currently supported). The calendar is +#' also implemented as a (hidden) class which converts netCDF file encodings to +#' timestamps as character strings, and vice-versa. Bounds information (the +#' period of time over which a timestamp is valid) is used when defined in the +#' netCDF file. #' -#' @return The timestamps in the specified CFtime instance. -#' @export -#' -#' @examples -#' cf <- CFtime("days since 1850-01-01", "julian", 0:364) -#' as.character(cf) -setMethod("as.character", "CFtime", function(x) { - if (length(x@offsets) > 0) - as_timestamp(x) -}) - -setMethod("show", "CFtime", function(object) { - noff <- length(object@offsets) - if (noff == 0L) { - el <- " Elements: (no elements)\n" - b <- " Bounds : (not set)\n" - } else { - d <- .ts_extremes(object) - if (noff > 1L) { - el <- sprintf(" Elements: [%s .. %s] (average of %f %s between %d elements)\n", - d[1L], d[2L], object@resolution, CFt$units$name[object@datum@unit], noff) - } else { - el <- paste(" Elements:", d[1L], "\n") - } - if (is.logical(object@bounds)) { - if (object@bounds) b <- " Bounds : regular and consecutive\n" - else b <- " Bounds : not set\n" - } else b <- " Bounds : irregular\n" - } - cat("CF time series:\n", methods::show(object@datum), el, b, sep = "") -}) - -#' Format time elements using format specifiers -#' -#' Format timestamps using a specific format string, using the specifiers -#' defined for the [base::strptime()] function, with limitations. The only -#' supported specifiers are `bBdeFhHIjmMpRSTYz%`. Modifiers `E` and `O` are -#' silently ignored. Other specifiers, including their percent sign, are copied -#' to the output as if they were adorning text. -#' -#' The formatting is largely oblivious to locale. The reason for this is that -#' certain dates in certain calendars are not POSIX-compliant and the system -#' functions necessary for locale information thus do not work consistently. The -#' main exception to this is the (abbreviated) names of months (`bB`), which -#' could be useful for pretty printing in the local language. For separators and -#' other locale-specific adornments, use local knowledge instead of depending on -#' system locale settings; e.g. specify `%m/%d/%Y` instead of `%D`. -#' -#' Week information, including weekday names, is not supported at all as a -#' "week" is not defined for non-standard CF calendars and not generally useful -#' for climate projection data. If you are working with observed data and want -#' to get pretty week formats, use the [as_timestamp()] function to generate -#' `POSIXct` timestamps (observed data generally uses a standard calendar) and -#' then use the [base::format()] function which supports the full set of -#' specifiers. -#' -#' @param x CFtime. A CFtime instance whose offsets will be returned as -#' timestamps. -#' @param format character. A character string with strptime format -#' specifiers. If omitted, the most economical format will be used: a full -#' timestamp when time information is available, a date otherwise. -#' -#' @returns A vector of character strings with a properly formatted timestamp. -#' Any format specifiers not recognized or supported will be returned verbatim. -#' @export -#' -#' @examples -#' cf <- CFtime("days since 2020-01-01", "standard", 0:365) -#' format(cf, "%Y-%b") -#' -#' # Use system facilities on a standard calendar -#' format(as_timestamp(cf, asPOSIX = TRUE), "%A, %x") -#' -setMethod("format", "CFtime", function(x, format) { - if (!requireNamespace("stringr", quietly = TRUE)) - stop("package `stringr` is required - please install it first") # nocov - - if (missing(format)) format <- "" - else if (!is.character(format) || length(format) != 1) - stop("`format` argument must be a character string with formatting specifiers") - - ts <- .offsets2time(x@offsets, x@datum) - if (nrow(ts) == 0L) return() - - .format_format(ts, tz(x@datum), format) -}) - -#' Create a factor for a CFtime instance -#' -#' Method for [base::cut()] applied to CFtime objects. -#' -#' When `breaks` is one of `"year", "season", "quarter", "month", "dekad", -#' "day"` a factor is generated like by [CFfactor()]. -#' -#' When `breaks` is a vector of character timestamps a factor is produced with a -#' level for every interval between timestamps. The last timestamp, therefore, -#' is only used to close the interval started by the pen-ultimate timestamp - -#' use a distant timestamp (e.g. `range(x)[2]`) to ensure that all offsets to -#' the end of the CFtime time series are included, if so desired. The last -#' timestamp will become the upper bound in the CFtime instance that is returned -#' as an attribute to this function so a sensible value for the last timestamp -#' is advisable. The earliest timestamp cannot be earlier than the origin of the -#' datum of `x`. -#' -#' This method works similar to [base::cut.POSIXt()] but there are some -#' differences in the arguments: for `breaks` the set of options is different -#' and no preceding integer is allowed, `labels` are always assigned using -#' values of `breaks`, and the interval is always left-closed. -#' -#' @param x An instance of CFtime. -#' @param breaks A character string of a factor period (see [CFfactor()] for a -#' description), or a character vector of timestamps that conform to the -#' calendar of `x`, with a length of at least 2. Timestamps must be given in -#' ISO8601 format, e.g. "2024-04-10 21:31:43". -#' @param ... Ignored. +#' Additionally, this class has functions to ease use of the netCDF "time" +#' information when processing data from netCDF files. Filtering and indexing of +#' time values is supported, as is the generation of factors. #' -#' @returns A factor with levels according to the `breaks` argument, with -#' attributes 'period', 'epoch' and 'CFtime'. When `breaks` is a factor -#' period, attribute 'period' has that value, otherwise it is '"day"'. When -#' `breaks` is a character vector of timestamps, attribute 'CFtime' holds an -#' instance of CFtime that has the same definition as `x`, but with (ordered) -#' offsets generated from the `breaks`. Attribute 'epoch' is always -1. -#' @aliases cut -#' @seealso [CFfactor()] produces a factor for several fixed periods, including -#' for epochs. #' @export -#' -#' @examples -#' x <- CFtime("days since 2021-01-01", "365_day", 0:729) -#' breaks <- c("2022-02-01", "2021-12-01", "2023-01-01") -#' cut(x, breaks) -setMethod("cut", "CFtime", function (x, breaks, ...) { - if (!inherits(x, "CFtime")) - stop("Argument 'x' must be a CFtime instance") - - if (missing(breaks) || !is.character(breaks) || (len <- length(breaks)) < 1) - stop("Argument 'breaks' must be a character vector with at least 1 value") - - if(len == 1) { - breaks <- sub("s$", "", tolower(breaks)) - if (breaks %in% CFt$factor_periods) - return(CFfactor(x, breaks)) - else stop("Invalid specification of 'breaks'") - } - - # breaks is a character vector of multiple timestamps - if (x@datum@unit > 4L) stop("Factorizing on a 'month' or 'year' datum is not supported") - time <- .parse_timestamp(x@datum, breaks) - if (anyNA(time$year)) - stop("Invalid specification of 'breaks'") - sorted <- order(time$offset) - ooff <- time$offset[sorted] - intv <- findInterval(offsets(x), ooff) - intv[which(intv %in% c(0L, len))] <- NA - f <- factor(intv, labels = breaks[sorted][1L:(len-1L)]) - - # Attributes - bnds <- rbind(ooff[1L:(len-1L)], ooff[2L:len]) - off <- bnds[1L, ] + (bnds[2L, ] - bnds[1L, ]) * 0.5 - cf <- CFtime(x@datum@definition, x@datum@calendar, off) - bounds(cf) <- bnds - attr(f, "period") <- "day" - attr(f, "epoch") <- -1L - attr(f, "CFtime") <- cf - f -}) - -setGeneric("indexOf", function(x, y, ...) standardGeneric("indexOf"), signature = c("x", "y")) - -#' Find the index of timestamps in the time series -#' -#' In the CFtime instance `y`, find the index in the time series for each -#' timestamp given in argument `x`. Values of `x` that are before the earliest -#' value in `y` will be returned as `0` (except when the value is before the -#' datum of `y`, in which case the value returned is `NA`); values of `x` that -#' are after the latest values in `y` will be returned as -#' `.Machine$integer.max`. Alternatively, when `x` is a numeric vector of index -#' values, return the valid indices of the same vector, with the side effect -#' being the attribute "CFtime" associated with the result. -#' -#' Timestamps can be provided as vectors of character strings, `POSIXct` or -#' `Date.` -#' -#' Matching also returns index values for timestamps that fall between two -#' elements of the time series - this can lead to surprising results when time -#' series elements are positioned in the middle of an interval (as the CF -#' Metadata Conventions instruct us to "reasonably assume"): a time series of -#' days in January would be encoded in a netCDF file as -#' `c("2024-01-01 12:00:00", "2024-01-02 12:00:00", "2024-01-03 12:00:00", ...)` -#' so `x <- c("2024-01-01", "2024-01-02", "2024-01-03")` would result in -#' `(NA, 1, 2)` (or `(NA, 1.5, 2.5)` with `method = "linear"`) because the date -#' values in `x` are at midnight. This situation is easily avoided by ensuring -#' that `y` has bounds set (use `bounds(y) <- TRUE` as a proximate solution if -#' bounds are not stored in the netCDF file). See the Examples. -#' -#' If bounds are set, the indices are taken from those bounds. Returned indices -#' may fall in between bounds if the latter are not contiguous, with the -#' exception of the extreme values in `x`. -#' -#' Values of `x` that are not valid timestamps according to the calendar of `y` -#' will be returned as `NA`. -#' -#' `x` can also be a numeric vector of index values, in which case the valid -#' values in `x` are returned. If negative values are passed, the positive -#' counterparts will be excluded and then the remainder returned. Positive and -#' negative values may not be mixed. Using a numeric vector has -#' the side effect that the result has the attribute "CFtime" describing the -#' temporal dimension of the slice. If index values outside of the range of `y` -#' (`1:length(y)`) are provided, an error will be thrown. -#' -#' @param x Vector of character, POSIXt or Date values to find indices for, or a -#' numeric vector. -#' @param y CFtime instance. -#' @param method Single value of "constant" or "linear". If `"constant"` or when -#' bounds are set on argument `y`, return the index value for each match. If -#' `"linear"`, return the index value with any fractional value. -#' -#' @returns A numeric vector giving indices into the "time" dimension of the -#' dataset associated with `y` for the values of `x`. If there is at least 1 -#' valid index, then attribute "CFtime" -#' contains an instance of CFtime that describes the dimension of filtering -#' the dataset associated with `y` with the result of this function, excluding -#' any `NA`, `0` and `.Machine$integer.max` values. -#' @aliases indexOf -#' @export -#' -#' @examples -#' cf <- CFtime("days since 2020-01-01", "360_day", 1440:1799 + 0.5) -#' as_timestamp(cf)[1:3] -#' x <- c("2024-01-01", "2024-01-02", "2024-01-03") -#' indexOf(x, cf) -#' indexOf(x, cf, method = "linear") -#' -#' bounds(cf) <- TRUE -#' indexOf(x, cf) -#' -#' # Non-existent calendar day in a `360_day` calendar -#' x <- c("2024-03-30", "2024-03-31", "2024-04-01") -#' indexOf(x, cf) -#' -#' # Numeric x -#' indexOf(c(29, 30, 31), cf) -setMethod("indexOf", c("ANY", "CFtime"), function(x, y, method = "constant") { - stopifnot(inherits(x, c("character", "POSIXt", "Date")) || is.numeric(x), - method %in% c("constant", "linear")) - - if (is.numeric(x)) { - if (!(all(x < 0, na.rm = TRUE) || all(x > 0, na.rm = TRUE))) - stop("Cannot mix positive and negative index values") - - intv <- (1:length(y))[x] - xoff <- y@offsets[x] - } else { - if (y@datum@unit > 4L) - stop("Parsing of timestamps on a \"month\" or \"year\" datum is not supported.") - - xoff <- .parse_timestamp(y@datum, as.character(x))$offset - vals <- .get_bounds(y) - if (is.null(vals)) vals <- offsets(y) - else vals <- c(vals[1L, 1L], vals[2L, ]) - intv <- stats::approx(vals, 1L:length(vals), xoff, method = method, - yleft = 0, yright = .Machine$integer.max)$y - intv[which(intv == length(vals))] <- .Machine$integer.max - } - - valid <- which(!is.na(intv) & intv > 0 & intv < .Machine$integer.max) - if (any(valid)) { - cf <- CFtime(definition(y), calendar(y), xoff[valid]) - yb <- bounds(y) - if (!is.null(yb)) - bounds(cf) <- yb[, intv[valid], drop = FALSE] - attr(intv, "CFtime") <- cf - } - intv -}) - -#' @title Extreme time series values -#' -#' @description Character representation of the extreme values in the time series -#' -#' @param x An instance of the `CFtime` class. -#' @param format A character string with format specifiers, optional. If it is -#' missing or an empty string, the most economical ISO8601 format is chosen: -#' "date" when no time information is present in `x`, "timestamp" otherwise. -#' Otherwise a suitable format specifier can be provided. -#' @param bounds Logical to indicate if the extremes from the bounds should be -#' used, if set. Defaults to `FALSE`. -#' @param ... Ignored. -#' @param na.rm Ignored. -#' -#' @returns Vector of two character representations of the extremes of the time series. -#' @export -#' @examples -#' cf <- CFtime("days since 1850-01-01", "julian", 0:364) -#' range(cf) -#' range(cf, "%Y-%b-%e") -setMethod("range", "CFtime", function(x, format = "", bounds = FALSE, ..., na.rm = FALSE) - .ts_extremes(x, format, bounds, ..., na.rm)) - -#' Indicates if the time series is complete -#' -#' This function indicates if the time series is complete, meaning that the time -#' steps are equally spaced and there are thus no gaps in the time series. -#' -#' This function gives exact results for time series where the nominal -#' *unit of separation* between observations in the time series is exact in terms of the -#' datum unit. As an example, for a datum unit of "days" where the observations -#' are spaced a fixed number of days apart the result is exact, but if the same -#' datum unit is used for data that is on a monthly basis, the *assessment* is -#' approximate because the number of days per month is variable and dependent on -#' the calendar (the exception being the `360_day` calendar, where the -#' assessment is exact). The *result* is still correct in most cases (including -#' all CF-compliant data sets that the developers have seen) although there may -#' be esoteric constructions of CFtime and offsets that trip up this -#' implementation. -#' -#' @param x An instance of the `CFtime` class -#' -#' @returns logical. `TRUE` if the time series is complete, with no gaps; -#' `FALSE` otherwise. If no offsets have been added to the CFtime instance, -#' `NA` is returned. -#' @export -#' @examples -#' cf <- CFtime("days since 1850-01-01", "julian", 0:364) -#' is_complete(cf) -is_complete <- function(x) { - if (!methods::is(x, "CFtime")) stop("Argument must be an instance of CFtime") - if (length(x@offsets) == 0L) NA - else .ts_equidistant(x) -} - -#' Which time steps fall within two extreme values -#' -#' Given two extreme character timestamps, return a logical vector of a length -#' equal to the number of time steps in the CFtime instance with values `TRUE` -#' for those time steps that fall between the two extreme values, `FALSE` -#' otherwise. This can be used to select slices from the time series in reading -#' or analysing data. -#' -#' If bounds were set these will be preserved. -#' -#' @param x CFtime. The time series to operate on. -#' @param extremes character. Vector of two timestamps that represent the -#' extremes of the time period of interest. The timestamps must be in -#' increasing order. The timestamps need not fall in the range of the time -#' steps in the CFtime stance. -#' @param rightmost.closed Is the larger extreme value included in the result? -#' Default is `FALSE`. -#' -#' @returns A logical vector with a length equal to the number of time steps in -#' `x` with values `TRUE` for those time steps that fall between the two -#' extreme values, `FALSE` otherwise. The earlier timestamp is included, the -#' later timestamp is excluded. A specification of `c("2022-01-01", "2023-01-01")` -#' will thus include all time steps that fall in the year 2022. -#' @export -#' -#' @examples -#' cf <- CFtime("hours since 2023-01-01 00:00:00", "standard", 0:23) -#' slab(cf, c("2022-12-01", "2023-01-01 03:00")) -slab <- function(x, extremes, rightmost.closed = FALSE) { - if (!methods::is(x, "CFtime")) stop("First argument must be an instance of CFtime") - if (!is.character(extremes) || length(extremes) != 2L) - stop("Second argument must be a character vector of two timestamps") - if (extremes[2L] < extremes[1L]) extremes <- c(extremes[2L], extremes[1L]) - .ts_slab(x, extremes, rightmost.closed) -} - -#' Equivalence of CFtime objects -#' -#' This operator can be used to test if two `CFtime` objects represent the same -#' CF-convention time coordinates. Two `CFtime` objects are considered equivalent -#' if they have an equivalent datum and the same offsets. -#' -#' @param e1,e2 CFtime. Instances of the `CFtime` class. -#' -#' @returns `TRUE` if the `CFtime` objects are equivalent, `FALSE` otherwise. -#' @export -#' @aliases CFtime-equivalent -#' -#' @examples -#' e1 <- CFtime("days since 1850-01-01", "gregorian", 0:364) -#' e2 <- CFtime("days since 1850-01-01 00:00:00", "standard", 0:364) -#' e1 == e2 -setMethod("==", c("CFtime", "CFtime"), function(e1, e2) - .datum_equivalent(e1@datum, e2@datum) && - length(e1@offsets) == length(e2@offsets) && - all(e1@offsets == e2@offsets)) - -#' Merge two CFtime objects -#' -#' Two `CFtime` instances can be merged into one with this operator, provided -#' that the units and calendars of the datums of the two instances are -#' equivalent. -#' -#' If the origins of the two datums are not identical, the earlier origin is -#' preserved and the offsets of the later origin are updated in the resulting -#' CFtime instance. -#' -#' The order of the two parameters is indirectly significant. The resulting -#' `CFtime` instance will have the offsets of both instances in the order that -#' they are specified. There is no reordering or removal of duplicates. This is -#' because the time series are usually associated with a data set and the -#' correspondence between the data in the files and the CFtime instance is thus -#' preserved. When merging the data sets described by this time series, the -#' order must be identical to the merging here. -#' -#' Any bounds that were set will be removed. Use [CFtime::bounds()] to retrieve -#' the bounds of the individual `CFtime` instances and then set them again after -#' merging the two instances. -#' -#' @param e1,e2 CFtime. Instances of the `CFtime` class. -#' -#' @returns A `CFtime` object with a set of offsets composed of the offsets of -#' the instances of `CFtime` that the operator operates on. If the datum units -#' or calendars of the `CFtime` instances are not equivalent, an error is -#' thrown. -#' @export -#' @aliases CFtime-merge -#' -#' @examples -#' e1 <- CFtime("days since 1850-01-01", "gregorian", 0:364) -#' e2 <- CFtime("days since 1850-01-01 00:00:00", "standard", 365:729) -#' e1 + e2 -setMethod("+", c("CFtime", "CFtime"), function(e1, e2) { - if (!.datum_compatible(e1@datum, e2@datum)) stop('Datums not compatible') - if (all(e1@datum@origin[1:6] == e2@datum@origin[1:6])) - CFtime(e1@datum@definition, e1@datum@calendar, c(e1@offsets, e2@offsets)) - else { - diff <- .parse_timestamp(e1@datum, paste(origin_date(e2@datum), origin_time(e2@datum)))$offset - if (is.na(diff)) { - diff <- .parse_timestamp(e2@datum, paste(origin_date(e1@datum), origin_time(e1@datum)))$offset - CFtime(e2@datum@definition, e2@datum@calendar, c(e1@offsets + diff, e2@offsets)) - } else - CFtime(e1@datum@definition, e1@datum@calendar, c(e1@offsets, e2@offsets + diff)) - } -}) - -#' Extend a CFtime object with additional offsets -#' -#' A `CFtime` instance can be extended by adding additional offsets using this -#' operator. -#' -#' The resulting `CFtime` instance will have its offsets in the order that they -#' are added, meaning that the offsets from the `CFtime` instance come first and -#' those from the numeric vector follow. There is no reordering or removal of -#' duplicates. This is because the time series are usually associated with a -#' data set and the correspondence between the two is thus preserved, if and -#' only if the data sets are merged in the same order. -#' -#' Note that when adding multiple vectors of offsets to a `CFtime` instance, it -#' is more efficient to first concatenate the vectors and then do a final -#' addition to the `CFtime` instance. So avoid `CFtime(definition, calendar, e1) + CFtime(definition, calendar, e2) + CFtime(definition, calendar, e3) + ...` -#' but rather do `CFtime(definition, calendar) + c(e1, e2, e3, ...)`. It is the -#' responsibility of the operator to ensure that the offsets of the different -#' data sets are in reference to the same datum. -#' -#' Note also that `RNetCDF` and `ncdf4` packages both return the values of the -#' "time" dimension as a 1-dimensional array. You have to `dim(time_values) <- NULL` -#' to de-class the array to a vector before adding offsets to an existing CFtime -#' instance. -#' -#' Negative offsets will generate an error. -#' -#' Any bounds that were set will be removed. Use [CFtime::bounds()] to retrieve -#' the bounds of the individual `CFtime` instances and then set them again after -#' merging the two instances. -#' -#' @param e1 CFtime. Instance of the `CFtime` class. -#' @param e2 numeric. Vector of offsets to be added to the `CFtime` instance. -#' -#' @returns A `CFtime` object with offsets composed of the `CFtime` instance and -#' the numeric vector. -#' @export -#' @aliases CFtime-append -#' -#' @examples -#' e1 <- CFtime("days since 1850-01-01", "gregorian", 0:364) -#' e2 <- 365:729 -#' e1 + e2 -setMethod("+", c("CFtime", "numeric"), function(e1, e2) { - if (.validOffsets(e2, CFt$units$per_day[e1@datum@unit])) - CFtime(e1@datum@definition, e1@datum@calendar, c(e1@offsets, e2)) -}) - -#' Validate offsets passed into a CFtime instance -#' -#' This is an internal function that should not be used outside the CFtime -#' package. -#' -#' Tests the `offsets` values. Throws an error if the argument contains negative or `NA` values. -#' -#' @param offsets The offsets to test -#' -#' @returns logical. `TRUE` if the offsets are valid, throws an error otherwise. -#' @noRd -.validOffsets <- function(offsets, upd) { - if (any(is.na(offsets) | (offsets < 0))) stop("Offsets cannot contain negative or `NA` values.") - if (any(offsets > 1000000 * upd)) stop("Offset values are outside of reasonable range (year 1 - 2500).") - TRUE -} - -#' Return the extremes of the time series as character strings -#' -#' This function returns the first and last timestamp of the time series as a -#' vector. Note that the offsets do not have to be sorted. -#' -#' This is an internal function that should not be used outside of the CFtime -#' package. -#' -#' @param x CFtime. The time series to operate on. -#' @param format character. Value of "date" or "timestamp". Optionally, a -#' character string that specifies an alternate format. -#' -#' @returns Vector of two character strings that represent the starting and -#' ending timestamps in the time series. If a `format` is supplied, that -#' format will be used. Otherwise, if all of the timestamps in the time series -#' have a time component of `00:00:00` the date of the timestamp is returned, -#' otherwise the full timestamp (without any time zone information). -#' -#' @noRd -.ts_extremes <- function(x, format = "", bounds = FALSE, ..., na.rm) { - if (length(x@offsets) == 0L) return(c(NA_character_, NA_character_)) - if (!missing(format) && ((!is.character(format)) || length(format) != 1L)) - stop("`format` argument, when present, must be a character string with formatting specifiers") - if (!is.logical(bounds) || length(bounds) != 1L) - stop("`bounds` argument, when present, must be a single logical value") - - if (bounds) { - bnds <- .get_bounds(x) - if (is.null(bnds)) time <- .offsets2time(range(x@offsets), x@datum) - else time <- .offsets2time(c(bnds[1L, 1L], bnds[2L, length(x)]), x@datum) - } else time <- .offsets2time(range(x@offsets), x@datum) - - .format_format(time, tz(x@datum), format) -} - -#' Which time steps fall within two extreme values -#' -#' Given two extreme character timestamps, return a logical vector of a length -#' equal to the number of time steps in the CFtime instance with values `TRUE` -#' for those time steps that fall between the two extreme values, `FALSE` -#' otherwise. -#' -#' **NOTE** Giving crap as the earlier timestamp will set that value to 0. So -#' invalid input will still generate a result. To be addressed. Crap in later -#' timestamp is not tolerated. -#' -#' @param x CFtime. The time series to operate on. -#' @param extremes character. Vector of two timestamps that represent the -#' extremes of the time period of interest. The timestamps must be in -#' increasing order. -#' @param closed Is the right side closed, i.e. included in the result? -#' -#' @returns A logical vector with a length equal to the number of time steps in -#' `x` with values `TRUE` for those time steps that fall between the two -#' extreme values, `FALSE` otherwise. The earlier timestamp is included, the -#' later timestamp is excluded. A specification of `c("2022-01-01", "2023-01-01)` -#' will thus include all time steps that fall in the year 2022. -#' -#' An attribute 'CFtime' will have the same definition as `x` but with offsets -#' corresponding to the time steps falling between the two extremes. If there -#' are no values between the extremes, the attribute is `NULL`. -#' @noRd -.ts_slab <- function(x, extremes, closed) { - ext <- .parse_timestamp(x@datum, extremes)$offset - if (is.na(ext[1L])) ext[1L] <- 0 - off <- x@offsets - if (ext[1L] > max(off) || is.na(ext[2L])) { - out <- rep(FALSE, length(off)) - attr(out, "CFtime") <- NULL - } else { - out <- if (closed) off >= ext[1L] & off <= ext[2L] - else off >= ext[1L] & off < ext[2L] - cf <- CFtime(x@datum@definition, x@datum@calendar, off[out]) - xb <- bounds(x) - if (!is.null(xb)) - bounds(cf) <- xb[, out] - attr(out, "CFtime") <- cf - } - out -} - -#' Decompose a vector of offsets, in units of the datum, to their timestamp -#' values -#' -#' This function adds a specified amount of time to the origin of a CFts object. -#' -#' This is an internal function that should not be used outside of the CFtime -#' package. -#' -#' This functions may introduce inaccuracies where the datum unit is "months" or -#' "years", due to the ambiguous definition of these units. -#' -#' @param offsets numeric. Vector of offsets to add to the datum. -#' @param datum CFdatum. The datum that defines the unit of the offsets and the -#' origin to add the offsets to. -#' -#' @returns A data.frame with columns for the timestamp elements and as many -#' rows as there are offsets. -#' @noRd -.offsets2time <- function(offsets, datum) { - len <- length(offsets) - if(len == 0L) return(data.frame(year = integer(), month = integer(), day = integer(), - hour = integer(), minute = integer(), second = numeric(), - tz = character(), offset = numeric())) - - if (datum@unit <= 4L) { # Days, hours, minutes, seconds - # First add time: convert to seconds first, then recompute time parts - secs <- offsets * CFt$units$seconds[datum@unit] - secs <- secs + datum@origin$hour[1L] * 3600L + datum@origin$minute[1L] * 60L + datum@origin$second[1L] - days <- secs %/% 86400L # overflow days - secs <- round(secs %% 86400L, 3L) # drop overflow days from time, round down to milli-seconds avoid errors - - # Time elements for output - hrs <- secs %/% 3600L - mins <- (secs %% 3600L) %/% 60L - secs <- secs %% 60L - - # Now add days using the calendar of the datum - origin <- unlist(datum@origin[1L,1L:3L]) # origin ymd as a named vector - if (any(days > 0)) { - switch (datum@cal_id, - out <- .offset2date_standard(days, origin), - out <- .offset2date_julian(days, origin), - out <- .offset2date_360(days, origin), - out <- .offset2date_fixed(days, origin, c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31), 365), - out <- .offset2date_fixed(days, origin, c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31), 366)) - } else { - out <- data.frame(year = rep(origin[1L], len), month = rep(origin[2L], len), day = rep(origin[3L], len)) - } - - # Put it all back together again - out$hour <- hrs - out$minute <- mins - out$second <- secs - out$tz <- rep(tz(datum), len) - } else { # Months, years - out <- datum@origin[rep(1L, len), ] - if (datum@unit == 5L) { # Offsets are months - months <- out$month + offsets - 1L - out$month <- months %% 12L + 1L - out$year <- out$year + months %/% 12L - } else { # Offsets are years - out$year <- out$year + offsets - } - } - out$offset <- offsets - return(out) -} - -#' 360_day, use integer arithmetic -#' This is an internal function that should not be used outside of the CFtime package. -#' -#' @param x integer. Vector of days to add to the origin. -#' @param origin integer. Vector of year, month, day and seconds to add days to. -#' -#' @returns A data frame with time elements year, month and day in columns and as -#' many rows as the length of vector `x`. -#' @noRd -.offset2date_360 <- function(x, origin) { - y <- origin[1L] + x %/% 360L - m <- origin[2L] + (x %% 360L) %/% 30L - d <- origin[3L] + x %% 30L - over <- which(d > 30L) - d[over] <- d[over] - 30L - m[over] <- m[over] + 1L - over <- which(m > 12L) - m[over] <- m[over] - 12L - y[over] <- y[over] + 1L - data.frame(year = y, month = m, day = d, row.names = NULL) -} - -#' Fixed year length, either 365_day or 366_day -#' -#' This is an internal function that should not be used outside of the CFtime package. -#' -#' @param x numeric. Vector of days to add to the origin. -#' @param origin numeric. Vector of year, month, day and seconds to add days to. -#' @param month numeric. Vector of days per month in the year. -#' @param ydays numeric. Number of days per year, either 365 or 366. -#' -#' @returns A data frame with time elements year, month and day in columns and as -#' many rows as the length of vector `x`. -#' @noRd -.offset2date_fixed <- function(x, origin, month, ydays) { - # First process full years over the vector - yr <- origin[1L] + (x %/% ydays) - x <- x %% ydays - - # Remaining portion per datum - x <- x + origin[3L] - ymd <- mapply(function(y, m, d) { - while (d > month[m]) { - d <- d - month[m] - m <- m + 1L - if (m == 13L) { - y <- y + 1L - m <- 1L +#' @references +#' https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#time-coordinate +#' @docType class +CFTime <- R6::R6Class("CFTime", + public = list( + #' @field cal The calendar of this `CFTime` instance, a descendant of the + #' [CFCalendar] class. + cal = NULL, + + #' @field offsets A numeric vector of offsets from the origin of the + #' calendar. + offsets = numeric(), + + #' @field resolution The average number of time units between offsets. + resolution = NA_real_, + + #' @field bounds Optional, the bounds for the offsets. If not set, it is the + #' logical value `FALSE`. If set, it is the logical value `TRUE` if the + #' bounds are regular with respect to the regularly spaced offsets (e.g. + #' successive bounds are contiguous and at mid-points between the + #' offsets); otherwise a `matrix` with columns for `offsets` and low + #' values in the first row, high values in the second row. + bounds = FALSE, + + #' @description Create a new instance of this class. + #' @param definition Character string of the units and origin of the + #' calendar. + #' @param calendar Character string of the calendar to use. Must be one of + #' the values permitted by the CF Metadata Conventions. If `NULL`, the + #' "standard" calendar will be used. + #' @param offsets Numeric or character vector, optional. When numeric, a + #' vector of offsets from the origin in the time series. When a character + #' vector of length 2 or more, timestamps in ISO8601 or UDUNITS format. + #' When a character string, a timestamp in ISO8601 or UDUNITS format and + #' then a time series will be generated with a separation between steps + #' equal to the unit of measure in the definition, inclusive of the + #' definition timestamp. The unit of measure of the offsets is defined by + #' the `definition` argument. + initialize = function(definition, calendar, offsets) { + if (is.null(calendar)) calendar <- "standard" # This may occur when "calendar" attribute is not defined in the NC file + self$cal <- switch(calendar, + "standard" = CFCalendarStandard$new(calendar, definition), + "gregorian" = CFCalendarStandard$new(calendar, definition), + "proleptic_gregorian" = CFCalendarProleptic$new(calendar, definition), + "julian" = CFCalendarJulian$new(calendar, definition), + "360_day" = CFCalendar360$new(calendar, definition), + "365_day" = CFCalendar365$new(calendar, definition), + "noleap" = CFCalendar365$new(calendar, definition), + "366_day" = CFCalendar366$new(calendar, definition), + "all_leap" = CFCalendar366$new(calendar, definition), + stop("Invalid calendar specification", call. = FALSE) + ) + + if (is.numeric(offsets)) { + dim(offsets) <- NULL + stopifnot(.validOffsets(offsets)) + + if (length(offsets) > 1L) { + self$resolution <- (max(offsets) - min(offsets)) / (length(offsets) - 1L) + if (any(diff(offsets) <= 0)) + warning("Offsets not monotonically increasing.", call. = FALSE) + } else { + self$resolution <- NA_real_ + } + self$offsets <- offsets + } else if (is.character(offsets)) { + time <- self$cal$parse(offsets) + if (anyNA(time$year)) stop("Argument `offsets` contains invalid timestamps", call. = FALSE) + + if (length(offsets) == 1L) { + self$offsets <- seq(0L, time$offset[1L]) + self$resolution <- 1 + } else { + self$offsets <- time$offset + self$resolution <- (max(self$offsets) - min(self$offsets)) / (length(self$offsets) - 1L) + if (any(diff(self$offsets) <= 0)) + warning("Offsets not monotonically increasing.", call. = FALSE) + } + } else if (!is.null(offsets)) stop("Invalid offsets for CFTime object", call. = FALSE) + }, + + #' @description Print a summary of the `CFTime` object to the console. + #' @param ... Ignored. + #' @return `self` invisibly. + print = function(...) { + noff <- length(self$offsets) + if (noff == 0L) { + el <- " Elements: (no elements)\n" + b <- " Bounds : (not set)\n" + } else { + d <- self$range() + if (noff > 1L) { + el <- sprintf(" Elements: [%s .. %s] (average of %f %s between %d elements)\n", + d[1L], d[2L], self$resolution, CFt$units$name[self$cal$unit], noff) + } else { + el <- paste(" Elements:", d[1L], "\n") + } + if (is.logical(self$bounds)) { + if (self$bounds) b <- " Bounds : regular and consecutive\n" + else b <- " Bounds : not set\n" + } else b <- " Bounds : irregular\n" + } + cal <- capture.output(self$cal$print()) + cat(paste(cal, collapse = "\n"), "\nTime series:\n", el, b, sep = "") + invisible(self) + }, + + #' @description This method returns the first and last timestamp of the time + #' series as a vector. Note that the offsets do not have to be sorted. + #' + #' @param format Value of "date" or "timestamp". Optionally, a + #' character string that specifies an alternate format. + #' @param bounds Logical to indicate if the extremes from the bounds should + #' be used, if set. Defaults to `FALSE`. + #' + #' @return Vector of two character strings that represent the starting and + #' ending timestamps in the time series. If a `format` is supplied, that + #' format will be used. Otherwise, if all of the timestamps in the time + #' series have a time component of `00:00:00` the date of the timestamp is + #' returned, otherwise the full timestamp (without any time zone + #' information). + range = function(format = "", bounds = FALSE) { + if (length(self$offsets) == 0L) return(c(NA_character_, NA_character_)) + if (!missing(format) && ((!is.character(format)) || length(format) != 1L)) + stop("`format` argument, when present, must be a character string with formatting specifiers", call. = FALSE) + if (!is.logical(bounds) || length(bounds) != 1L) + stop("`bounds` argument, when present, must be a single logical value", call. = FALSE) + + if (bounds) { + bnds <- self$get_bounds() + if (is.null(bnds)) time <- self$cal$offsets2time(base::range(self$offsets)) + else time <- self$cal$offsets2time(c(bnds[1L, 1L], bnds[2L, length(self$offsets)])) + } else time <- self$cal$offsets2time(base::range(self$offsets)) + + .format_format(time, self$cal$timezone, format) + }, + + #' @description This method generates a vector of character strings or + #' `POSIXct`s that represent the date and time in a selectable combination + #' for each offset. + #' + #' The character strings use the format `YYYY-MM-DDThh:mm:ss±hhmm`, + #' depending on the `format` specifier. The date in the string is not + #' necessarily compatible with `POSIXt` - in the `360_day` calendar + #' `2017-02-30` is valid and `2017-03-31` is not. + #' + #' For the "proleptic_gregorian" calendar the output can also be generated + #' as a vector of `POSIXct` values by specifying `asPOSIX = TRUE`. The + #' same is possible for the "standard" and "gregorian" calendars but only + #' if all timestamps fall on or after 1582-10-15. If `asPOSIX = TRUE` is + #' specified while the calendar does not support it, an error will be + #' generated. + #' + #' @param format character. A character string with either of the values + #' "date" or "timestamp". If the argument is not specified, the format + #' used is "timestamp" if there is time information, "date" otherwise. + #' @param asPOSIX logical. If `TRUE`, for "standard", "gregorian" and + #' "proleptic_gregorian" calendars the output is a vector of `POSIXct` - + #' for other calendars an error will be thrown. Default value is `FALSE`. + #' + #' @return A character vector where each element represents a moment in + #' time according to the `format` specifier. + as_timestamp = function(format = NULL, asPOSIX = FALSE) { + if (asPOSIX && !self$cal$POSIX_compatible(self$offsets)) + stop("Cannot make a POSIX timestamp with this calendar.", call. = FALSE) + if (length(self$offsets) == 0L) return() + + time <- self$cal$offsets2time(self$offsets) + + if (is.null(format)) + format <- ifelse(self$cal$unit < 4L || .has_time(time), "timestamp", "date") + else if (!(format %in% c("date", "timestamp"))) + stop("Format specifier not recognized", call. = FALSE) + + if (asPOSIX) { + if (format == "date") ISOdate(time$year, time$month, time$day, 0L) + else ISOdatetime(time$year, time$month, time$day, time$hour, time$minute, time$second, "UTC") + } else .format_format(time, self$cal$timezone, format) + }, + + #' @description Format timestamps using a specific format string, using the + #' specifiers defined for the [base::strptime()] function, with + #' limitations. The only supported specifiers are `bBdeFhHImMpRSTYz%`. + #' Modifiers `E` and `O` are silently ignored. Other specifiers, including + #' their percent sign, are copied to the output as if they were adorning + #' text. + #' + #' The formatting is largely oblivious to locale. The reason for this is + #' that certain dates in certain calendars are not POSIX-compliant and the + #' system functions necessary for locale information thus do not work + #' consistently. The main exception to this is the (abbreviated) names of + #' months (`bB`), which could be useful for pretty printing in the local + #' language. For separators and other locale-specific adornments, use + #' local knowledge instead of depending on system locale settings; e.g. + #' specify `%m/%d/%Y` instead of `%D`. + #' + #' Week information, including weekday names, is not supported at all as a + #' "week" is not defined for non-standard CF calendars and not generally + #' useful for climate projection data. If you are working with observed + #' data and want to get pretty week formats, use the [as_timestamp()] + #' method to generate `POSIXct` timestamps (observed data generally uses a + #' "standard" calendar) and then use the [base::format()] function which + #' supports the full set of specifiers. + #' + #' @param format A character string with `strptime` format specifiers. If + #' omitted, the most economical format will be used: a full timestamp when + #' time information is available, a date otherwise. + #' + #' @return A vector of character strings with a properly formatted + #' timestamp. Any format specifiers not recognized or supported will be + #' returned verbatim. + format = function(format) { + if (length(self$offsets) == 0L) return(character(0L)) + + if (!requireNamespace("stringr", quietly = TRUE)) + stop("package `stringr` is required - please install it first", call. = FALSE) # nocov + + if (missing(format)) format <- "" + else if (!is.character(format) || length(format) != 1L) + stop("`format` argument must be a character string with formatting specifiers", call. = FALSE) + + ts <- self$cal$offsets2time(self$offsets) + .format_format(ts, self$cal$timezone, format) + }, + + #' @description Find the index in the time series for each timestamp given + #' in argument `x`. Values of `x` that are before the earliest value in + #' the time series will be returned as `0`; values of `x` that are after + #' the latest values in the time series will be returned as + #' `.Machine$integer.max`. Alternatively, when `x` is a numeric vector of + #' index values, return the valid indices of the same vector, with the + #' side effect being the attribute "CFTime" associated with the result. + #' + #' Matching also returns index values for timestamps that fall between two + #' elements of the time series - this can lead to surprising results when + #' time series elements are positioned in the middle of an interval (as + #' the CF Metadata Conventions instruct us to "reasonably assume"): a time + #' series of days in January would be encoded in a netCDF file as + #' `c("2024-01-01 12:00:00", "2024-01-02 12:00:00", "2024-01-03 12:00:00", ...)` + #' so `x <- c("2024-01-01", "2024-01-02", "2024-01-03")` would + #' result in `(NA, 1, 2)` (or `(NA, 1.5, 2.5)` with `method = "linear"`) + #' because the date values in `x` are at midnight. This situation is + #' easily avoided by ensuring that this `CFTime` instance has bounds set + #' (use `bounds(y) <- TRUE` as a proximate solution if bounds are not + #' stored in the netCDF file). See the Examples. + #' + #' If bounds are set, the indices are taken from those bounds. Returned + #' indices may fall in between bounds if the latter are not contiguous, + #' with the exception of the extreme values in `x`. + #' + #' Values of `x` that are not valid timestamps according to the calendar + #' of this `CFTime` instance will be returned as `NA`. + #' + #' `x` can also be a numeric vector of index values, in which case the + #' valid values in `x` are returned. If negative values are passed, the + #' positive counterparts will be excluded and then the remainder returned. + #' Positive and negative values may not be mixed. Using a numeric vector + #' has the side effect that the result has the attribute "CFTime" + #' describing the temporal dimension of the slice. If index values outside + #' of the range of `self` are provided, an error will be thrown. + #' + #' @param x Vector of character, POSIXt or Date values to find indices for, + #' or a numeric vector. + #' @param method Single value of "constant" or "linear". If `"constant"` or + #' when bounds are set on `self`, return the index value for each + #' match. If `"linear"`, return the index value with any fractional value. + #' + #' @return A numeric vector giving indices into the "time" dimension of the + #' dataset associated with `self` for the values of `x`. If there is at + #' least 1 valid index, then attribute "CFTime" contains an instance of + #' `CFTime` that describes the dimension of filtering the dataset + #' associated with `self` with the result of this function, excluding any + #' `NA`, `0` and `.Machine$integer.max` values. + indexOf = function(x, method = "constant") { + stopifnot(inherits(x, c("character", "POSIXt", "Date")) || is.numeric(x), + method %in% c("constant", "linear")) + + if (is.numeric(x)) { + if (!(all(x < 0, na.rm = TRUE) || all(x > 0, na.rm = TRUE))) + stop("Cannot mix positive and negative index values", call. = FALSE) + + intv <- (1:length(self$offsets))[x] + xoff <- self$offsets[x] + } else { + if (self$cal$unit > 4L) + stop("Parsing of timestamps on a 'month' or 'year' time unit is not supported.", call. = FALSE) + + xoff <- self$cal$parse(as.character(x))$offset + vals <- self$get_bounds() + vals <- if (is.null(vals)) self$offsets + else c(vals[1L, 1L], vals[2L, ]) + intv <- stats::approx(vals, 1L:length(vals), xoff, method = method, + yleft = 0, yright = .Machine$integer.max)$y + intv[which(intv == length(vals))] <- .Machine$integer.max } - } - return(c(y, m, d)) - }, yr, origin[2L], x) - data.frame(year = ymd[1L,], month = ymd[2L,], day = ymd[3L,], row.names = NULL) -} - -#' Julian calendar offsetting -#' -#' This is an internal function that should not be used outside of the CFtime package. -#' -#' @param x numeric. Vector of days to add to the origin. -#' @param origin numeric. Vector of year, month, day and seconds to add days to. -#' -#' @returns A data frame with time elements year, month and day in columns and as -#' many rows as the length of vector `x`. -#' @noRd -.offset2date_julian <- function(x, origin) { - common_days <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - leap_days <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - - # Is the leap day to consider ahead in the year from the base date (offset = 0) or in the next year (offset = 1) - offset <- as.integer(origin[2L] > 2L) - - # First process 4-year cycles of 1,461 days over the vector - yr <- origin[1L] + (x %/% 1461L) * 4L - x <- x %% 1461L - - # Remaining portion per datum - x <- x + origin[3L] - ymd <- mapply(function(y, m, d) { - repeat { - leap <- (y + offset) %% 4L == 0L - ydays <- 365L + as.integer(leap) - - if (d > ydays) { - d <- d - ydays - y <- y + 1L - } else break - } - - if (leap) month <- leap_days else month <- common_days - while (d > month[m]) { - d <- d - month[m] - m <- m + 1L - if (m == 13L) { - y <- y + 1L - m <- 1L + valid <- which(!is.na(intv) & intv > 0 & intv < .Machine$integer.max) + if (any(valid)) { + t <- CFTime$new(self$cal$definition, self$cal$name, xoff[valid]) + bnds <- self$get_bounds() + if (!is.null(bnds)) + t$set_bounds(bnds[, intv[valid], drop = FALSE]) + attr(intv, "CFTime") <- t + } + intv + }, + + #' @description Return bounds. + #' + #' @param format A string specifying a format for output, optional. + #' @return An array with dims(2, length(offsets)) with values for the + #' bounds. `NULL` if the bounds have not been set. + get_bounds = function(format) { + len <- length(self$offsets) + if (len == 0L) return(NULL) + + bnds <- self$bounds + if (is.logical(bnds)) { + if (!bnds) return(NULL) + + b <- seq(from = self$offsets[1L] - self$resolution * 0.5, + by = self$resolution, + length.out = len + 1L) + if (!missing(format)) { + ts <- self$cal$offsets2time(b) + b <- .format_format(ts, self$cal$timezone, format) + } + return(rbind(b[1L:len], b[2L:(len+1L)])) } - } - return(c(y, m, d)) - }, yr, origin[2L], x) - data.frame(year = ymd[1L,], month = ymd[2L,], day = ymd[3L,], row.names = NULL) -} -#' Standard calendar offsetting -#' -#' This is an internal function that should not be used outside of the CFtime package. -#' -#' @param x numeric. Vector of days to add to the origin. -#' @param origin numeric. Vector of year, month, day and seconds to add days to. -#' -#' @returns A data frame with time elements year, month and day in columns and as -#' many rows as the length of vector `x`. -#' @noRd -.offset2date_standard <- function(x, origin) { - common_days <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - leap_days <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - - # Is the leap day to consider ahead in the year from the base date (offset = 0) or in the next year (offset = 1) - offset <- as.integer(origin[2L] > 2L) - - x <- x + origin[3L] - ymd <- mapply(function(y, m, d) { - repeat { - test <- y + offset - leap <- (test %% 4L == 0L && test %% 100L > 0L) || test %% 400L == 0L - ydays <- 365L + as.integer(leap) - - if (d > ydays) { - d <- d - ydays - y <- y + 1L - } else break - } + # bnds is a matrix + if (missing(format)) return(bnds) + + ts <- self$cal$offsets2time(as.vector(bnds)) + b <- .format_format(ts, self$cal$timezone, format) + dim(b) <- c(2L, len) + b + }, + + #' @description Set the bounds of the `CFTime` instance. + #' + #' @param value The bounds to set, in units of the offsets. Either a matrix + #' `(2, length(self$offsets))` or a logical. + #' @return `self` invisibly. + set_bounds = function(value) { + if (isFALSE(value)) self$bounds <- FALSE + else if (isTRUE(value)) self$bounds <- TRUE + else { + off <- self$offsets + len <- length(off) + + if (len == 0L) + stop("Cannot set bounds when there are no offsets", call. = FALSE) + + if (is.matrix(value) && is.numeric(value)) { + if (!all(dim(value) == c(2L, len))) + stop("Replacement value has incorrect dimensions", call. = FALSE) + } else stop("Replacement value must be a numeric matrix or a single logical value", call. = FALSE) + + if (!(all(value[2L,] >= off) && all(off >= value[1L,]))) + stop("Values of the replacement value must surround the offset values", call. = FALSE) + + # Compress array to `TRUE`, if regular + if (len > 1L && identical(value[1L,2L:len], value[2L,1L:(len-1L)]) && + diff(range(diff(value[1L,]))) == 0) value <- TRUE + + self$bounds <- value + invisible(self) + } + }, + + #' This method returns `TRUE` if the time series has uniformly distributed + #' time steps between the extreme values, `FALSE` otherwise. First test + #' without sorting; this should work for most data sets. If not, only then + #' offsets are sorted. For most data sets that will work but for implied + #' resolutions of month, season, year, etc based on a "days" or finer + #' calendar unit this will fail due to the fact that those coarser units + #' have a variable number of days per time step, in all calendars except for + #' `360_day`. For now, an approximate solution is used that should work in + #' all but the most non-conformal exotic arrangements. + #' + #' @return `TRUE` if all time steps are equidistant, `FALSE` otherwise, or + #' `NA` if no offsets have been set. + equidistant = function() { + if (length(self$offsets) == 0L) return(NA) + out <- all(diff(self$offsets) == self$resolution) + if (!out) { + doff <- diff(sort(self$offsets)) + out <- all(doff == self$resolution) + if (!out) { + # Don't try to make sense of totally non-standard arrangements such as + # calendar units "years" or "months" describing sub-daily time steps. + # Also, 360_day calendar should be well-behaved so we don't want to get here. + if (self$cal$unit > 4L || inherits(self$cal, "CFCalendar360")) return(FALSE) + + # Check if we have monthly or yearly data on a finer-scale calendar + # This is all rather approximate but should be fine in most cases + # This accommodates middle-of-the-time-period offsets as per the + # CF Metadata Conventions + # Please report problems at https://github.com/pvanlaake/CFtime/issues + ddays <- range(doff) * CFt$units$per_day[self$cal$unit] + return((ddays[1] >= 28 && ddays[2] <= 31) || # months + (ddays[1] >= 8 && ddays[2] <= 11) || # dekads + (ddays[1] >= 90 && ddays[2] <= 92) || # seasons, quarters + (ddays[1] >= 365 && ddays[2] <= 366)) # years + } + } + out + }, + + #' @description Given two extreme character timestamps, return a logical vector of a length + #' equal to the number of time steps in the time series with values `TRUE` + #' for those time steps that fall between the two extreme values, `FALSE` + #' otherwise. + #' + #' **NOTE** Giving crap as the earlier timestamp will set that value to 0. So + #' invalid input will still generate a result. To be addressed. Crap in later + #' timestamp is not tolerated. + #' + #' @param extremes Character vector of two timestamps that represent the + #' extremes of the time period of interest. + #' @param closed Is the right side closed, i.e. included in the result? + #' @return A logical vector with a length equal to the number of time steps in + #' `self` with values `TRUE` for those time steps that fall between the two + #' extreme values, `FALSE` otherwise. The earlier timestamp is included, the + #' later timestamp is excluded. A specification of `c("2022-01-01", "2023-01-01)` + #' will thus include all time steps that fall in the year 2022. + #' + #' An attribute 'CFTime' will have the same definition as `self` but with offsets + #' corresponding to the time steps falling between the two extremes. If there + #' are no values between the extremes, the attribute is `NULL`. + slab = function(extremes, closed) { + if (!is.character(extremes) || length(extremes) != 2L) + stop("Second argument must be a character vector of two timestamps", call. = FALSE) + if (extremes[2L] < extremes[1L]) extremes <- c(extremes[2L], extremes[1L]) + + ext <- self$cal$parse(extremes)$offset + if (is.na(ext[1L])) ext[1L] <- 0 + off <- self$offsets + if (ext[1L] > max(off) || is.na(ext[2L])) { + out <- rep(FALSE, length(off)) + attr(out, "CFTime") <- NULL + } else { + out <- if (closed) off >= ext[1L] & off <= ext[2L] + else off >= ext[1L] & off < ext[2L] + t <- CFTime$new(self$cal$definition, self$cal$name, off[out]) + bnds <- self$get_bounds() + if (!is.null(bnds)) + t$set_bounds(bnds[, out]) + attr(out, "CFTime") <- t + } + out + }, + + #' @description Can the time series be converted to POSIXt? + #' @return `TRUE` if the calendar support coversion to POSIXt, `FALSE` + #' otherwise. + POSIX_compatible = function() { + self$cal$POSIX_compatible(self$offsets) + }, + + #' @description Create a factor for a `CFTime` instance. + #' + #' When argument `breaks` is one of `"year", "season", "quarter", "month", + #' "dekad", "day"`, a factor is generated like by [CFfactor()]. When + #' `breaks` is a vector of character timestamps a factor is produced with + #' a level for every interval between timestamps. The last timestamp, + #' therefore, is only used to close the interval started by the + #' pen-ultimate timestamp - use a distant timestamp (e.g. `range(x)[2]`) + #' to ensure that all offsets to the end of the CFTime time series are + #' included, if so desired. The last timestamp will become the upper bound + #' in the `CFTime` instance that is returned as an attribute to this + #' function so a sensible value for the last timestamp is advisable. + #' + #' This method works similar to [base::cut.POSIXt()] but there are some + #' differences in the arguments: for `breaks` the set of options is + #' different and no preceding integer is allowed, `labels` are always + #' assigned using values of `breaks`, and the interval is always + #' left-closed. + #' + #' @param breaks A character string of a factor period (see [CFfactor()] for + #' a description), or a character vector of timestamps that conform to the + #' calendar of `x`, with a length of at least 2. Timestamps must be given + #' in ISO8601 format, e.g. "2024-04-10 21:31:43". + #' + #' @return A factor with levels according to the `breaks` argument, with + #' attributes 'period', 'era' and 'CFTime'. When `breaks` is a factor + #' period, attribute 'period' has that value, otherwise it is '"day"'. + #' When `breaks` is a character vector of timestamps, attribute 'CFTime' + #' holds an instance of `CFTime` that has the same definition as `x`, but + #' with (ordered) offsets generated from the `breaks`. Attribute 'era' + #' is always -1. + cut = function(breaks) { + if (missing(breaks) || !is.character(breaks) || (len <- length(breaks)) < 1L) + stop("Argument 'breaks' must be a character vector with at least 1 value", call. = FALSE) + + if(len == 1L) { + breaks <- sub("s$", "", tolower(breaks)) + if (breaks %in% CFt$factor_periods) + return(CFfactor(self, breaks)) # FIXME after CFfactor is done + else stop("Invalid specification of 'breaks'", call. = FALSE) + } - if (leap) month <- leap_days else month <- common_days + # breaks is a character vector of multiple timestamps + if (self$cal$unit > 4L) stop("Factorizing on a 'month' or 'year' time unit is not supported", call. = FALSE) + time <- self$cal$parse(breaks) + if (anyNA(time$year)) + stop("Invalid specification of 'breaks'", call. = FALSE) + sorted <- order(time$offset) + ooff <- time$offset[sorted] + intv <- findInterval(self$offsets, ooff) + intv[which(intv %in% c(0L, len))] <- NA + f <- factor(intv, labels = breaks[sorted][1L:(len-1L)]) + + # Attributes + bnds <- rbind(ooff[1L:(len-1L)], ooff[2L:len]) + off <- bnds[1L, ] + (bnds[2L, ] - bnds[1L, ]) * 0.5 + t <- CFTime$new(self$cal$definition, self$cal$name, off) + bounds(t) <- bnds + attr(f, "period") <- "day" + attr(f, "era") <- -1L + attr(f, "CFTime") <- t + f + }, + + #' @description Generate a factor for the offsets, or a part thereof. This is + #' specifically interesting for creating factors from the date part of the + #' time series that aggregate the time series into longer time periods (such + #' as month) that can then be used to process daily CF data sets using, for + #' instance, `tapply()`. + #' + #' The factor will respect the calendar that the time series is built on. + #' + #' The factor will be generated in the order of the offsets. While typical + #' CF-compliant data sources use ordered time series there is, however, no + #' guarantee that the factor is ordered. For most processing with a factor + #' the ordering is of no concern. + #' + #' If the `era` parameter is specified, either as a vector of years to + #' include in the factor, or as a list of such vectors, the factor will only + #' consider those values in the time series that fall within the list of + #' years, inclusive of boundary values. Other values in the factor will be + #' set to `NA`. The years need not be contiguous, within a single vector or + #' among the list items, or in order. + #' + #' The following periods are supported by this method: + #' + #' \itemize{ + #' \item `year`, the year of each offset is returned as "YYYY". + #' \item `season`, the meteorological season of each offset is returned as + #' "Sx", with x being 1-4, preceeded by "YYYY" if no `era` is + #' specified. Note that December dates are labeled as belonging to the + #' subsequent year, so the date "2020-12-01" yields "2021S1". This implies + #' that for standard CMIP files having one or more full years of data the + #' first season will have data for the first two months (January and + #' February), while the final season will have only a single month of data + #' (December). + #' \item `quarter`, the calendar quarter of each offset is returned as "Qx", + #' with x being 1-4, preceeded by "YYYY" if no `era` is specified. + #' \item `month`, the month of each offset is returned as "01" to + #' "12", preceeded by "YYYY-" if no `era` is specified. This is the default + #' period. + #' \item `dekad`, ten-day periods are returned as + #' "Dxx", where xx runs from "01" to "36", preceeded by "YYYY" if no `era` + #' is specified. Each month is subdivided in dekads as follows: 1- days 01 - + #' 10; 2- days 11 - 20; 3- remainder of the month. + #' \item `day`, the month and day of each offset are returned as "MM-DD", + #' preceeded by "YYYY-" if no `era` is specified. + #' } + #' + #' It is not possible to create a factor for a period that is shorter than + #' the temporal resolution of the calendar. As an example, if the calendar + #' has a monthly unit, a dekad or day factor cannot be created. + #' + #' Creating factors for other periods is not supported by this method. + #' Factors based on the timestamp information and not dependent on the + #' calendar can trivially be constructed from the output of the + #' [as_timestamp()] function. + #' + #' For non-era factors the attribute 'CFTime' of the result contains a + #' `CFTime` instance that is valid for the result of applying the factor to + #' a resource that this instance is associated with. In other words, if + #' `CFTime` instance 'At' describes the temporal dimension of resource 'A' + #' and a factor 'Af' is generated from `Af <- At$factor()`, then + #' `Bt <- attr(Af, "CFTime")` describes the temporal dimension of the result + #' of, say, `B <- apply(A, 1:2, tapply, Af, FUN)`. The 'CFTime' attribute is + #' `NULL` for era factors. + #' + #' @param period character. A character string with one of the values + #' "year", "season", "quarter", "month" (the default), "dekad" or "day". + #' @param era numeric or list, optional. Vector of years for which to + #' construct the factor, or a list whose elements are each a vector of + #' years. If `era` is not specified, the factor will use the entire time + #' series for the factor. + #' @return If `era` is a single vector or not specified, a factor with a + #' length equal to the number of offsets in this instance. If `era` is a + #' list, a list with the same number of elements and names as `era`, + #' each containing a factor. Elements in the factor will be set to `NA` + #' for time series values outside of the range of specified years. + #' + #' The factor, or factors in the list, have attributes 'period', 'era' + #' and 'CFTime'. Attribute 'period' holds the value of the `period` + #' argument. Attribute 'era' indicates the number of years that are + #' included in the era, or -1 if no `era` is provided. Attribute + #' 'CFTime' holds an instance of `CFTime` that has the same definition as + #' this instance, but with offsets corresponding to the mid-point of + #' non-era factor levels; if the `era` argument is specified, + #' attribute 'CFTime' is `NULL`. + factor = function(period = "month", era = NULL) { + if (length(self$offsets) < 10L) stop("Cannot create a factor for very short time series", call. = FALSE) + + period <- tolower(period) + if (!((length(period) == 1L) && (period %in% CFt$factor_periods))) + stop("Period specifier must be a single value of a supported period", call. = FALSE) + + # No fine-grained period factors for coarse source data + timestep <- CFt$units$seconds[self$cal$unit] * self$resolution; + if ((period == "year") && (timestep > 86400 * 366) || + (period %in% c("season", "quarter")) && (timestep > 86400 * 90) || # Somewhat arbitrary + (period == "month") && (timestep > 86400 * 31) || + (period == "dekad") && (timestep > 86400) || # Must be constructed from daily or finer data + (period == "day") && (timestep > 86400)) # Must be no longer than a day + stop("Cannot produce a short period factor from source data with long time interval", call. = FALSE) + + time <- self$cal$offsets2time(self$offsets) + months <- c("01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12") + + if (is.null(era)) { + # Create the factor for the specified period as well as bounds dates for a + # new CFtime instance for the factor. Lower bounds for the factor level is + # easy, upper bound of last level takes effort. + switch(period, + "year" = { + out <- as.factor(sprintf("%04d", time$year)) + l <- levels(out) + dt <- c(paste0(l, "-01-01"), sprintf("%04d-01-01", as.integer(l[nlevels(out)]) + 1L)) + }, + "season" = { + if (!requireNamespace("stringr")) + stop("Must install package `stringr` to use this functionality.", call. = FALSE) + + out <- as.factor( + ifelse(time$month == 12L, sprintf("%04dS1", time$year + 1L), + sprintf("%04dS%d", time$year, time$month %/% 3L + 1L))) + l <- levels(out) + dt <- ifelse(substr(l, 6L, 6L) == "1", paste0(as.integer(substr(l, 1L, 4L)) - 1L, "-12-01"), + stringr::str_replace_all(l, c("S2" = "-03-01", "S3" = "-06-01", "S4" = "-09-01"))) + ll <- l[nlevels(out)] + lp <- as.integer(substr(ll, 6L, 6L)) + if (lp == 1L) + dt <- c(dt, sprintf("%04d-03-01", as.integer(substr(ll, 1L, 4L)) + 1L)) + else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp * 3L)) + }, + "quarter" = { + if (!requireNamespace("stringr")) + stop("Must install package `stringr` to use this functionality.", call. = FALSE) + + out <- as.factor(sprintf("%04dQ%d", time$year, (time$month - 1L) %/% 3L + 1L)) + l <- levels(out) + dt <- stringr::str_replace_all(l, c("Q1" = "-01-01", "Q2" = "-04-01", "Q3" = "-07-01", "Q4" = "-10-01")) + ll <- l[nlevels(out)] + lp <- as.integer(substr(ll, 6L, 6L)) + if (lp == 4L) + dt <- c(dt, sprintf("%04d-01-01", as.integer(substr(ll, 1L, 4L)) + 1L)) + else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp * 3L + 1L)) + }, + "month" = { + out <- as.factor(sprintf("%04d-%s", time$year, months[time$month])) + l <- levels(out) + dt <- paste0(l, "-01") + ll <- l[nlevels(out)] + lp <- as.integer(substr(ll, 6L, 7L)) + if (lp == 12L) + dt <- c(dt, sprintf("%04d-01-01", as.integer(substr(ll, 1L, 4L)) + 1L)) + else dt <- c(dt, sprintf("%s-%02d-01", substr(ll, 1L, 4L), lp + 1L)) + }, + "dekad" = { + out <- as.factor(sprintf("%04dD%02d", time$year, (time$month - 1L) * 3L + pmin.int((time$day - 1L) %/% 10L + 1L, 3L))) + l <- levels(out) + dk <- as.integer(substr(l, 6L, 7L)) - 1L + dt <- sprintf("%s-%02d-%s", substr(l, 1L, 4L), dk %/% 3L + 1L, c("01", "11", "21")[dk %% 3L + 1L]) + ll <- l[nlevels(out)] + lp <- as.integer(substr(ll, 6L, 7L)) + yr <- as.integer(substr(ll, 1L, 4L)) + if (lp == 36L) + dt <- c(dt, sprintf("%04d-01-01", yr + 1L)) + else dt <- c(dt, sprintf("%04d-%02d-%s", yr, (lp + 1L) %/% 3L + 1L, c("01", "11", "21")[(lp + 1L) %% 3L + 1L])) + }, + "day" = { + out <- as.factor(sprintf("%04d-%02d-%02d", time$year, time$month, time$day)) + l <- levels(out) + lp <- l[nlevels(out)] + last <- self$cal$offsets2time(self$cal$parse(lp)$offset) + dt <- c(l, sprintf("%04d-%02d-%02d", last$year, last$month, last$day)) + } + ) + + # Convert bounds dates to an array of offsets, find mid-points, create new CFTime instance + off <- self$cal$parse(dt)$offset + off[is.na(off)] <- 0 # This can happen only when the time series starts at or close to the origin, for seasons + noff <- length(off) + bnds <- rbind(off[1L:(noff - 1L)], off[2L:noff]) + off <- bnds[1L,] + (bnds[2L,] - bnds[1L,]) * 0.5 + new_cf <- CFTime$new(self$cal$definition, self$cal$name, off) + bounds(new_cf) <- TRUE + + # Bind attributes to the factor + attr(out, "era") <- -1L + attr(out, "period") <- period + attr(out, "CFTime") <- new_cf + return(out) + } - while (d > month[m]) { - d <- d - month[m] - m <- m + 1L - if (m == 13L) { - y <- y + 1L - m <- 1L + # Era factor + if (is.numeric(era)) ep <- list(era) + else if ((is.list(era) && all(unlist(lapply(era, is.numeric))))) ep <- era + else stop("When specified, the `era` parameter must be a numeric vector or a list thereof", call. = FALSE) + + out <- lapply(ep, function(years) { + f <- switch(period, + "year" = ifelse(time$year %in% years, sprintf("%04d", time$year), NA_character_), + "season" = ifelse((time$month == 12L) & ((time$year + 1L) %in% years), "S1", + ifelse((time$month < 12L) & (time$year %in% years), sprintf("S%d", time$month %/% 3L + 1L), NA_character_)), + "quarter" = ifelse(time$year %in% years, sprintf("Q%d", (time$month - 1L) %/% 3L + 1L), NA_character_), + "month" = ifelse(time$year %in% years, months[time$month], NA_character_), + "dekad" = ifelse(time$year %in% years, sprintf("D%02d", (time$month - 1L) * 3L + pmin.int((time$day - 1L) %/% 10L + 1L, 3L)), NA_character_), + "day" = ifelse(time$year %in% years, sprintf("%s-%02d", months[time$month], time$day), NA_character_) + ) + f <- as.factor(f) + attr(f, "era") <- length(years) + attr(f, "period") <- period + attr(f, "CFTime") <- NULL + f + }) + if (is.numeric(era)) out <- out[[1L]] + else names(out) <- names(era) + out + }, + + #' @description Given a factor as produced by `CFTime$factor()`, this method + #' will return a numeric vector with the number of time units in each + #' level of the factor. + #' + #' The result of this method is useful to convert between absolute and + #' relative values. Climate change anomalies, for instance, are usually + #' computed by differencing average values between a future period and a + #' baseline period. Going from average values back to absolute values for + #' an aggregate period (which is typical for temperature and + #' precipitation, among other variables) is easily done with the result of + #' this method, without having to consider the specifics of the calendar + #' of the data set. + #' + #' If the factor `f` is for an era (e.g. spanning multiple years and the + #' levels do not indicate the specific year), then the result will + #' indicate the number of time units of the period in a regular single + #' year. In other words, for an era of 2041-2060 and a monthly factor on a + #' standard calendar with a `days` unit, the result will be + #' `c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)`. Leap days are thus + #' only considered for the `366_day` and `all_leap` calendars. + #' + #' Note that this function gives the number of time units in each level of + #' the factor - the actual number of data points in the time series per + #' factor level may be different. Use [CFfactor_coverage()] to determine + #' the actual number of data points or the coverage of data points + #' relative to the factor level. + #' + #' @param f A factor or a list of factors derived from the method + #' `CFTime$factor()`. + #' @return If `f` is a factor, a numeric vector with a length equal to the + #' number of levels in the factor, indicating the number of time units in + #' each level of the factor. If `f` is a list of factors, a list with each + #' element a numeric vector as above. + factor_units = function(f) { + if (is.list(f)) factors <- f else factors <- list(f) + if (!(all(unlist(lapply(factors, function(x) is.factor(x) && is.numeric(attr(x, "era")) && + attr(x, "period") %in% CFt$factor_periods))))) + stop("Argument `f` must be a factor generated by the method `CFTime$factor()`", call. = FALSE) + + out <- lapply(factors, function(fac) .factor_units(fac, self$cal, CFt$units$per_day[self$cal$unit])) + if (is.factor(f)) out <- out[[1L]] + out + }, + + #' @description Calculate the number of time elements, or the relative + #' coverage, in each level of a factor generated by `CFTime$factor()`. + #' + #' @param f A factor or a list of factors derived from the method + #' `CFTime$factor()`. + #' @param coverage "absolute" or "relative". + #' @return If `f` is a factor, a numeric vector with a length equal to the + #' number of levels in the factor, indicating the number of units from the + #' time series contained in each level of the factor when + #' `coverage = "absolute"` or the proportion of units present relative to the + #' maximum number when `coverage = "relative"`. If `f` is a list of factors, a + #' list with each element a numeric vector as above. + factor_coverage = function(f, coverage = "absolute") { + if (is.list(f)) factors <- f else factors <- list(f) + if (!(all(unlist(lapply(factors, function(x) is.factor(x) && is.numeric(attr(x, "era")) && + attr(x, "period") %in% CFt$factor_periods))))) + stop("Argument `f` must be a factor generated by the method `CFTime$factor()`", call. = FALSE) + + if (!(is.character(coverage) && coverage %in% c("absolute", "relative"))) + stop("Argument `coverage` must be a character string with a value of 'absolute' or 'relative'", call. = FALSE) + + if (coverage == "relative") { + out <- lapply(factors, function(fac) { + res <- tabulate(fac) / .factor_units(fac, self$cal, CFt$units$per_day[self$cal$unit]) + yrs <- attr(fac, "era") + if (yrs > 0) res <- res / yrs + return(res) + }) + } else { + out <- lapply(factors, tabulate) } - } - return(c(y, m, d)) - }, origin[1L], origin[2L], x) - data.frame(year = ymd[1L,], month = ymd[2L,], day = ymd[3L,], row.names = NULL) -} + if (is.factor(f)) out <- out[[1L]] + out + } + ) +) diff --git a/R/CFutils.R b/R/CFutils.R deleted file mode 100644 index b04ee46..0000000 --- a/R/CFutils.R +++ /dev/null @@ -1,132 +0,0 @@ -#' Return the number of days in a month given a certain CF calendar -#' -#' Given a vector of dates as strings in ISO 8601 or UDUNITS format and a `CFtime` object, -#' this function will return a vector of the same length as the dates, -#' indicating the number of days in the month according to the calendar -#' specification. If no vector of days is supplied, the function will return an -#' integer vector of length 12 with the number of days for each month of the -#' calendar (disregarding the leap day for `standard` and `julian` calendars). -#' -#' @param cf CFtime. The CFtime definition to use. -#' @param x character. An optional vector of dates as strings with format -#' `YYYY-MM-DD`. Any time part will be silently ingested. -#' -#' @returns A vector indicating the number of days in each month for the vector -#' of dates supplied as a parameter to the function. If no dates are supplied, -#' the number of days per month for the calendar as a vector of length 12. -#' Invalidly specified dates will result in an `NA` value. -#' @export -#' @seealso When working with factors generated by [CFfactor()], it is usually -#' better to use [CFfactor_units()] as that will consider leap days for -#' non-epoch factors. [CFfactor_units()] can also work with other time periods -#' and datum units, such as "hours per month", or "days per season". -#' @examples -#' dates <- c("2021-11-27", "2021-12-10", "2022-01-14", "2022-02-18") -#' cf <- CFtime("days since 1850-01-01", "standard") -#' month_days(cf, dates) -#' -#' cf <- CFtime("days since 1850-01-01", "360_day") -#' month_days(cf, dates) -#' -#' cf <- CFtime("days since 1850-01-01", "all_leap") -#' month_days(cf, dates) -#' -#' month_days(cf) -month_days <- function(cf, x = NULL) { - stopifnot(methods::is(cf, "CFtime")) - cal_id <- cf@datum@cal_id - - days_in_month <- c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - leapdays_in_month <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - - # No dates supplied: return standard number of days per month - if (is.null(x)) { - if (cal_id %in% c(1L, 2L, 4L)) return(days_in_month) - if (cal_id == 3L) return(rep(30L, 12L)) - return(leapdays_in_month) - } - - # Argument x supplied - if (!(is.character(x))) stop("Argument `x` must be a character vector of dates in 'YYYY-MM-DD' format") - - ymd <- .parse_timestamp(cf@datum, x) - if (anyNA(ymd$year)) warning("Some dates could not be parsed. Result contains `NA` values.") - - if (cal_id == 3L) { # 360_day - res <- rep(30L, length(x)) - res[which(is.na(ymd$year))] <- NA - return(res) - } - - if (cal_id == 4L) return(days_in_month[ymd$month]) - if (cal_id == 5L) return(leapdays_in_month[ymd$month]) - - # Standard and julian calendars - ifelse(.is_leap_year(ymd$year, cal_id), leapdays_in_month[ymd$month], days_in_month[ymd$month]) -} - -#' Check if the supplied year, month and day form a valid date in the specified -#' calendar. -#' -#' This is an internal function that should not be used outside of the CFtime package. -#' -#' @param yr numeric. The year to test, must be in range 1:9999. -#' @param mon numeric. The month to test, must be in range 1:12 -#' @param day numeric. The day to test, must be in the range permitted by the calendar. -#' @param cal_id numeric. Identifier of the calendar to use to test the validity of the date. -#' -#' @returns boolean. TRUE if the date is valid, FALSE otherwise. -#' @noRd -.is_valid_calendar_date <- function(yr, mon, day, cal_id) { - if (is.na(yr) || is.na(mon)) return(FALSE) - - # Check valid date ranges, no extended syntax - if ((yr < 1L) || (yr > 9999L)) return(FALSE) # year out of range - if ((mon < 1L) || (mon > 12L)) return(FALSE) # month out of range - if (is.na(day)) return(TRUE) # day not specified - if ((day >= 1L) && (day <= 28L)) return(TRUE) # day in safe range, 90% of valid cases - else if ((day < 1L) || day > 31L) return(FALSE) # day out of range - - # 360_day calendar: oddball case for month length - if (cal_id == 3L) return(day <= 30L) - - # Now all dates should be in regular-length months, but check for leap years - # Day is in range 29:31 because day in range 1:28 already passed - if (mon == 2L) { # February - if (day > 29L) return(FALSE) - if (cal_id == 5L) return(TRUE) # all_leap - if (cal_id == 4L) return(FALSE) # no_leap - if (cal_id == 2L) return(yr %% 4L == 0L) # julian: every 4th year is a leap year - return(((yr %% 4L == 0L) && (yr %% 100L > 0L)) || (yr %% 400L == 0L)) # standard calendar - } - return(!((mon %in% c(4L, 6L, 9L, 11L)) && (day == 31L))) # months other than February -} - -#' Flag which years are leap years, given a certain CF calendar -#' -#' This is an internal function that should not be used outside of the CFtime package. -#' -#' @param yr numeric. Vector of years to test. -#' @param cal integer. The id of the calendar. -#' -#' @returns A logical vector of the same length as argument `yr` which is `TRUE` -#' for elements that are leap years for the given calendar, `FALSE` otherwise. -#' @noRd -.is_leap_year <- function(yr, cal) { - switch (cal, - ((yr %% 4L == 0L) & (yr %% 100L > 0L)) | (yr %% 400L == 0L), - yr %% 4L == 0L, - rep(FALSE, length(yr)), - rep(FALSE, length(yr)), - rep(TRUE, length(yr))) -} - -#' Display the structure of a CFdatum instance -#' -#' @param object `CFdatum` instance to print structure of. -#' @param ... Ignored. -#' @returns Nothing. Prints information to the console. -#' @export -str.CFdatum <- function(object, ...) { - cat(" ", object@definition, " [ ", object@calendar, " calendar ]\n", sep = "") -} diff --git a/R/api.R b/R/api.R new file mode 100644 index 0000000..6eb0c11 --- /dev/null +++ b/R/api.R @@ -0,0 +1,795 @@ +#' Create a CFTime object +#' +#' This function creates an instance of the [CFTime] class. The arguments to the +#' call are typically read from a CF-compliant data file with climatological +#' observations or climate projections. Specification of arguments can also be +#' made manually in a variety of combinations. +#' +#' @param definition A character string describing the time coordinate. +#' @param calendar A character string describing the calendar to use with the +#' time dimension definition string. Default value is "standard". +#' @param offsets Numeric or character vector, optional. When numeric, a vector +#' of offsets from the origin in the time series. When a character vector of +#' length 2 or more, timestamps in ISO8601 or UDUNITS format. When a character +#' string, a timestamp in ISO8601 or UDUNITS format and then a time series +#' will be generated with a separation between steps equal to the unit of +#' measure in the definition, inclusive of the definition timestamp. The unit +#' of measure of the offsets is defined by the time series definition. +#' @returns An instance of the `CFTime` class. +#' @export +#' @name CFtime-function +#' @examples +#' CFtime("days since 1850-01-01", "julian", 0:364) +#' +#' CFtime("hours since 2023-01-01", "360_day", "2023-01-30T23:00") +CFtime <- function(definition, calendar = "standard", offsets = NULL) { + CFTime$new(definition, calendar, offsets) +} + +# ============================================================================== +# Functions to access CFTime properties and methods + +#' @aliases properties +#' @title Properties of a CFTime object +#' +#' @description These functions return the properties of an instance of the +#' [CFTime] class. The properties are all read-only, but offsets can be added +#' using the `+` operator. +#' +#' @param t An instance of `CFTime`. +#' +#' @returns `calendar()` and `unit()` return a character string. +#' `origin()` returns a data frame of timestamp elements with a single row +#' of data. `timezone()` returns the calendar time zone as a character +#' string. `offsets()` returns a vector of offsets or `NULL` if no offsets +#' have been set. +#' +#' @examples +#' t <- CFtime("days since 1850-01-01", "julian", 0:364) +#' definition(t) +#' calendar(t) +#' unit(t) +#' timezone(t) +#' origin(t) +#' offsets(t) +#' resolution(t) + +#' @describeIn properties The definition string of the `CFTime` instance. +#' @export +definition <- function(t) t$cal$definition + +#' @describeIn properties The calendar of the `CFTime` instance. +#' @export +calendar <- function(t) t$cal$name + +#' @describeIn properties The unit of the `CFTime` instance. +#' @export +unit <- function(t) CFt$units$name[t$cal$unit] + +#' @describeIn properties The origin of the `CFTime` instance in timestamp elements. +#' @export +origin <- function(t) t$cal$origin + +#' @describeIn properties The time zone of the calendar of the `CFTime` instance as a character string. +#' @export +timezone <- function(t) t$cal$timezone + +#' @describeIn properties The offsets of the `CFTime` instance as a numeric vector. +#' @export +offsets <- function(t) t$offsets + +#' @describeIn properties The average separation between the offsets in the `CFTime` instance. +#' @export +resolution <- function(t) t$resolution + +#' Bounds of the time offsets +#' +#' CF-compliant netCDF files store time information as a single offset value for +#' each step along the dimension, typically centered on the valid interval of +#' the data (e.g. 12-noon for day data). Optionally, the lower and upper values +#' of the valid interval are stored in a so-called "bounds" variable, as an +#' array with two rows (lower and higher value) and a column for each offset. +#' With function `bounds()<-` those bounds can be set for a `CFTime` instance. +#' The bounds can be retrieved with the `bounds()` function. +#' +#' @param x A `CFTime` instance. +#' @param format Optional. A single string with format specifiers, see +#' [CFtime::format()] for details. +#' @return If bounds have been set, an array of bounds values with dimensions +#' (2, length(offsets)). The first row gives the lower bound, the second row +#' the upper bound, with each column representing an offset of `x`. If the +#' `format` argument is specified, the bounds values are returned as strings +#' according to the format. `NULL` when no bounds have been set. +#' @aliases bounds +#' @export +#' @examples +#' t <- CFtime("days since 2024-01-01", "standard", seq(0.5, by = 1, length.out = 366)) +#' as_timestamp(t)[1:3] +#' bounds(t) <- rbind(0:365, 1:366) +#' bounds(t)[, 1:3] +#' bounds(t, "%d-%b-%Y")[, 1:3] +bounds <- function(x, format) { + x$get_bounds(format) +} + +#' @rdname bounds +#' @param value A `matrix` (or `array`) with dimensions (2, length(offsets)) +#' giving the lower (first row) and higher (second row) bounds of each offset +#' (this is the format that the CF Metadata Conventions uses for storage in +#' netCDF files). Use `FALSE` to unset any previously set bounds, `TRUE` to +#' set regular bounds at mid-points between the offsets (which must be regular +#' as well). +#' @export +`bounds<-` <- function(x, value) { + x$set_bounds(value) + x +} + +#' The length of the offsets contained in the `CFTime` instance. +#' +#' @param x The `CFTime` instance whose length will be returned +#' +#' @return The number of offsets in the specified `CFTime` instance. +#' @export +#' +#' @examples +#' t <- CFtime("days since 1850-01-01", "julian", 0:364) +#' length(t) +length.CFTime <- function(x) base::length(x$offsets) + +#' Return the timestamps contained in the `CFTime` instance. +#' +#' @param x The `CFTime` instance whose timestamps will be returned. +#' @param ... Ignored. +#' +#' @return The timestamps in the specified `CFTime` instance. +#' @export +#' +#' @examples +#' t <- CFtime("days since 1850-01-01", "julian", 0:364) +#' as.character(t) +as.character.CFTime <- function(x, ...) { + x$as_timestamp() +} + +#' Create a factor for a `CFTime` instance +#' +#' Method for [base::cut()] applied to [CFTime] objects. +#' +#' When `breaks` is one of `"year", "season", "quarter", "month", "dekad", +#' "day"` a factor is generated like by [CFfactor()]. +#' +#' When `breaks` is a vector of character timestamps a factor is produced with a +#' level for every interval between timestamps. The last timestamp, therefore, +#' is only used to close the interval started by the pen-ultimate timestamp - +#' use a distant timestamp (e.g. `range(x)[2]`) to ensure that all offsets to +#' the end of the CFTime time series are included, if so desired. The last +#' timestamp will become the upper bound in the `CFTime` instance that is +#' returned as an attribute to this function so a sensible value for the last +#' timestamp is advisable. +#' +#' This method works similar to [base::cut.POSIXt()] but there are some +#' differences in the arguments: for `breaks` the set of options is different +#' and no preceding integer is allowed, `labels` are always assigned using +#' values of `breaks`, and the interval is always left-closed. +#' +#' @param x An instance of `CFTime`. +#' @param breaks A character string of a factor period (see [CFfactor()] for a +#' description), or a character vector of timestamps that conform to the +#' calendar of `x`, with a length of at least 2. Timestamps must be given in +#' ISO8601 format, e.g. "2024-04-10 21:31:43". +#' @param ... Ignored. +#' @returns A factor with levels according to the `breaks` argument, with +#' attributes 'period', 'era' and 'CFTime'. When `breaks` is a factor +#' period, attribute 'period' has that value, otherwise it is '"day"'. When +#' `breaks` is a character vector of timestamps, attribute 'CFTime' holds an +#' instance of `CFTime` that has the same definition as `x`, but with (ordered) +#' offsets generated from the `breaks`. Attribute 'era' is always -1. +#' @aliases cut +#' @seealso [CFfactor()] produces a factor for several fixed periods, including +#' for eras. +#' @export +#' @examples +#' x <- CFtime("days since 2021-01-01", "365_day", 0:729) +#' breaks <- c("2022-02-01", "2021-12-01", "2023-01-01") +#' cut(x, breaks) +cut.CFTime <- function (x, breaks, ...) { + if (!inherits(x, "CFTime")) + stop("Argument 'x' must be a CFTime instance", call. = FALSE) + x$cut(breaks) +} + +#' Find the index of timestamps in the time series +#' +#' Find the index in the time series for each timestamp given in argument `x`. +#' Values of `x` that are before the earliest value in `y` will be returned as +#' `0`; values of `x` that are after the latest values in `y` will be returned +#' as `.Machine$integer.max`. Alternatively, when `x` is a numeric vector of +#' index values, return the valid indices of the same vector, with the side +#' effect being the attribute "CFTime" associated with the result. +#' +#' Timestamps can be provided as vectors of character strings, `POSIXct` or +#' `Date.` +#' +#' Matching also returns index values for timestamps that fall between two +#' elements of the time series - this can lead to surprising results when time +#' series elements are positioned in the middle of an interval (as the CF +#' Metadata Conventions instruct us to "reasonably assume"): a time series of +#' days in January would be encoded in a netCDF file as +#' `c("2024-01-01 12:00:00", "2024-01-02 12:00:00", "2024-01-03 12:00:00", ...)` +#' so `x <- c("2024-01-01", "2024-01-02", "2024-01-03")` would result in +#' `(NA, 1, 2)` (or `(NA, 1.5, 2.5)` with `method = "linear"`) because the date +#' values in `x` are at midnight. This situation is easily avoided by ensuring +#' that `y` has bounds set (use `bounds(y) <- TRUE` as a proximate solution if +#' bounds are not stored in the netCDF file). See the Examples. +#' +#' If bounds are set, the indices are taken from those bounds. Returned indices +#' may fall in between bounds if the latter are not contiguous, with the +#' exception of the extreme values in `x`. +#' +#' Values of `x` that are not valid timestamps according to the calendar of `y` +#' will be returned as `NA`. +#' +#' `x` can also be a numeric vector of index values, in which case the valid +#' values in `x` are returned. If negative values are passed, the positive +#' counterparts will be excluded and then the remainder returned. Positive and +#' negative values may not be mixed. Using a numeric vector has the side effect +#' that the result has the attribute "CFTime" describing the temporal dimension +#' of the slice. If index values outside of the range of `y` (`1:length(y)`) are +#' provided, an error will be thrown. +#' +#' @param x Vector of `character`, `POSIXt` or `Date` values to find indices +#' for, or a numeric vector. +#' @param y [CFTime] instance. +#' @param method Single value of "constant" or "linear". If `"constant"` or when +#' bounds are set on argument `y`, return the index value for each match. If +#' `"linear"`, return the index value with any fractional value. +#' +#' @returns A numeric vector giving indices into the "time" dimension of the +#' data set associated with `y` for the values of `x`. If there is at least 1 +#' valid index, then attribute "CFTime" contains an instance of `CFTime` that +#' describes the dimension of filtering the data set associated with `y` with +#' the result of this function, excluding any `NA`, `0` and +#' `.Machine$integer.max` values. +#' @export +#' +#' @examples +#' cf <- CFtime("days since 2020-01-01", "360_day", 1440:1799 + 0.5) +#' as_timestamp(cf)[1:3] +#' x <- c("2024-01-01", "2024-01-02", "2024-01-03") +#' indexOf(x, cf) +#' indexOf(x, cf, method = "linear") +#' +#' bounds(cf) <- TRUE +#' indexOf(x, cf) +#' +#' # Non-existent calendar day in a `360_day` calendar +#' x <- c("2024-03-30", "2024-03-31", "2024-04-01") +#' indexOf(x, cf) +#' +#' # Numeric x +#' indexOf(c(29, 30, 31), cf) +indexOf <- function(x, y, method = "constant") { + y$indexOf(x, method) +} + +#' Extreme time series values +#' +#' Character representation of the extreme values in the time series. +#' +#' @param x An instance of the [CFTime] class. +#' @param format A character string with format specifiers, optional. If it is +#' missing or an empty string, the most economical ISO8601 format is chosen: +#' "date" when no time information is present in `x`, "timestamp" otherwise. +#' Otherwise a suitable format specifier can be provided. +#' @param bounds Logical to indicate if the extremes from the bounds should be +#' used, if set. Defaults to `FALSE`. +#' @param ... Ignored. +#' @param na.rm Ignored. +#' @return Vector of two character representations of the extremes of the time +#' series. +#' @export +#' @examples +#' cf <- CFtime("days since 1850-01-01", "julian", 0:364) +#' range(cf) +#' range(cf, "%Y-%b-%e") +range.CFTime <- function(x, format = "", bounds = FALSE, ..., na.rm = FALSE) { + x$range(format, bounds) +} + +#' Indicates if the time series is complete +#' +#' This function indicates if the time series is complete, meaning that the time +#' steps are equally spaced and there are thus no gaps in the time series. +#' +#' This function gives exact results for time series where the nominal +#' *unit of separation* between observations in the time series is exact in +#' terms of the calendar unit. As an example, for a calendar unit of "days" where the +#' observations are spaced a fixed number of days apart the result is exact, but +#' if the same calendar unit is used for data that is on a monthly basis, the +#' *assessment* is approximate because the number of days per month is variable +#' and dependent on the calendar (the exception being the `360_day` calendar, +#' where the assessment is exact). The *result* is still correct in most cases +#' (including all CF-compliant data sets that the developers have seen) although +#' there may be esoteric constructions of CFTime and offsets that trip up this +#' implementation. +#' +#' @param x An instance of the [CFTime] class. +#' @returns logical. `TRUE` if the time series is complete, with no gaps; +#' `FALSE` otherwise. If no offsets have been added to the `CFTime` instance, +#' `NA` is returned. +#' @export +#' @examples +#' t <- CFtime("days since 1850-01-01", "julian", 0:364) +#' is_complete(t) +is_complete <- function(x) { + if (!inherits(x, "CFTime")) stop("Argument must be an instance of `CFTime`", call. = FALSE) + x$equidistant() +} + +#' Which time steps fall within two extreme values +#' +#' Given two extreme character timestamps, return a logical vector of a length +#' equal to the number of time steps in the [CFTime] instance with values `TRUE` +#' for those time steps that fall between the two extreme values, `FALSE` +#' otherwise. This can be used to select slices from the time series in reading +#' or analysing data. +#' +#' If bounds were set these will be preserved. +#' +#' @param x The `CFTime` instance to operate on. +#' @param extremes Character vector of two timestamps that represent the +#' extremes of the time period of interest. The timestamps must be in +#' increasing order. The timestamps need not fall in the range of the time +#' steps in argument `x. +#' @param rightmost.closed Is the larger extreme value included in the result? +#' Default is `FALSE`. +#' @returns A logical vector with a length equal to the number of time steps in +#' `x` with values `TRUE` for those time steps that fall between the two +#' extreme values, `FALSE` otherwise. The earlier timestamp is included, the +#' later timestamp is excluded. A specification of `c("2022-01-01", "2023-01-01")` +#' will thus include all time steps that fall in the year 2022. +#' @export +#' @examples +#' t <- CFtime("hours since 2023-01-01 00:00:00", "standard", 0:23) +#' slab(t, c("2022-12-01", "2023-01-01 03:00")) +slab <- function(x, extremes, rightmost.closed = FALSE) { + if (!inherits(x, "CFTime")) stop("First argument must be an instance of `CFTime`", call. = FALSE) + x$slab(extremes, rightmost.closed) +} + +#' Equivalence of CFTime objects +#' +#' This operator can be used to test if two [CFTime] objects represent the same +#' CF-convention time coordinates. Two `CFTime` objects are considered equivalent +#' if they have an equivalent calendar and the same offsets. +#' +#' @param e1,e2 Instances of the `CFTime` class. +#' @returns `TRUE` if the `CFTime` objects are equivalent, `FALSE` otherwise. +#' @export +#' @aliases CFtime-equivalent +#' @examples +#' e1 <- CFtime("days since 1850-01-01", "gregorian", 0:364) +#' e2 <- CFtime("days since 1850-01-01 00:00:00", "standard", 0:364) +#' e1 == e2 +"==.CFTime" <- function(e1, e2) + e1$cal$is_equivalent(e2$cal) && + length(e1$offsets) == length(e2$offsets) && + all(e1$offsets == e2$offsets) + +#' Extend a CFTime object +#' +#' A [CFTime] instance can be extended with this operator, using values from +#' another `CFTime` instance, or a vector of numeric offsets or character +#' timestamps. If the values come from another `CFTime` instance, the calendars +#' of the two instances must be compatible If the calendars of the `CFTime` +#' instances are not compatible, an error is thrown. +#' +#' The resulting `CFTime` instance will have the offsets of the original +#' `CFTime` instance, appended with offsets from argument `e2` in the order that +#' they are specified. If the new sequence of offsets is not monotonically +#' increasing a warning is generated (the COARDS metadata convention requires +#' offsets to be monotonically increasing). +#' +#' There is no reordering or removal of duplicates. This is because the time +#' series are usually associated with a data set and the correspondence between +#' the data in the files and the `CFTime` instance is thus preserved. When +#' merging the data sets described by this time series, the order must be +#' identical to the merging here. +#' +#' Note that when adding multiple vectors of offsets to a `CFTime` instance, it +#' is more efficient to first concatenate the vectors and then do a final +#' addition to the `CFTime` instance. So avoid +#' `CFtime(definition, calendar, e1) + CFtime(definition, calendar, e2) + CFtime(definition, calendar, e3) + ...` +#' but rather do `CFtime(definition, calendar) + c(e1, e2, e3, ...)`. It is the +#' responsibility of the operator to ensure that the offsets of the different +#' data sets are in reference to the same calendar. +#' +#' Note also that `RNetCDF` and `ncdf4` packages both return the values of the +#' "time" dimension as a 1-dimensional array. You have to `dim(time_values) <- +#' NULL` to de-class the array to a vector before adding offsets to an existing +#' `CFtime` instance. +#' +#' Any bounds that were set will be removed. Use [bounds()] to retrieve the +#' bounds of the individual `CFTime` instances and then set them again after +#' merging the two instances. +#' +#' @param e1 Instance of the `CFTime` class. +#' @param e2 Instance of the `CFTime` class with a calendar compatible with that +#' of argument `e1`, or a numeric vector with offsets from the origin of +#' argument `e1`, or a vector of `character` timestamps in ISO8601 or UDUNITS +#' format. +#' @returns A `CFTime` object with the offsets of argument `e1` extended by the +#' values from argument `e2`. +#' @export +#' @aliases CFtime-merge +#' @examples +#' e1 <- CFtime("days since 1850-01-01", "gregorian", 0:364) +#' e2 <- CFtime("days since 1850-01-01 00:00:00", "standard", 365:729) +#' e1 + e2 +"+.CFTime" <- function(e1, e2) { + if (inherits(e2, "CFTime")) { + if (!e1$cal$is_compatible(e2$cal)) stop("Calendars not compatible", call. = FALSE) + if (all(e1$cal$origin[1:6] == e2$cal$origin[1:6])) + CFTime$new(e1$cal$definition, e1$cal$name, c(e1$offsets, e2$offsets)) + else { + diff <- e1$cal$parse(paste(e2$cal$origin_date, e2$cal$origin_time))$offset + CFTime$new(e1$cal$definition, e1$cal$name, c(e1$offsets, e2$offsets + diff)) + } + } else if (is.numeric(e2) && .validOffsets(e2)) { + CFTime$new(e1$cal$definition, e1$cal$name, c(e1$offsets, e2)) + } else { + time <- e1$cal$parse(e2) + if (anyNA(time$year)) stop("Argument `e2` contains invalid timestamps", call. = FALSE) + CFTime$new(e1$cal$definition, e1$cal$name, c(e1$offsets, time$offset)) + } +} + +# ============================================================================== +# Factors and coverage + +#' Create a factor from the offsets in a `CFTime` instance +#' +#' With this function a factor can be generated for the time series, or a part +#' thereof, contained in the [CFTime] instance. This is specifically interesting +#' for creating factors from the date part of the time series that aggregate the +#' time series into longer time periods (such as month) that can then be used to +#' process daily CF data sets using, for instance, `tapply()`. +#' +#' The factor will respect the calendar that the time series is built on. For +#' `period`s longer than a day this will result in a factor where the calendar +#' is no longer relevant (because calendars impacts days, not dekads, months, +#' quarters, seasons or years). +#' +#' The factor will be generated in the order of the offsets of the `CFTime` +#' instance. While typical CF-compliant data sources use ordered time series +#' there is, however, no guarantee that the factor is ordered as multiple +#' `CFTime` objects may have been merged out of order. For most processing with +#' a factor the ordering is of no concern. +#' +#' If the `era` parameter is specified, either as a vector of years to include +#' in the factor, or as a list of such vectors, the factor will only consider +#' those values in the time series that fall within the list of years, inclusive +#' of boundary values. Other values in the factor will be set to `NA`. The years +#' need not be contiguous, within a single vector or among the list items, or in +#' order. +#' +#' The following periods are supported by this function: +#' +#' \itemize{ +#' \item `year`, the year of each offset is returned as "YYYY". +#' \item `season`, the meteorological season of each offset is returned as +#' "Sx", with x being 1-4, preceeded by "YYYY" if no `era` is +#' specified. Note that December dates are labeled as belonging to the +#' subsequent year, so the date "2020-12-01" yields "2021S1". This implies +#' that for standard CMIP files having one or more full years of data the +#' first season will have data for the first two months (January and +#' February), while the final season will have only a single month of data +#' (December). +#' \item `quarter`, the calendar quarter of each offset is returned as "Qx", +#' with x being 1-4, preceeded by "YYYY" if no `era` is specified. +#' \item `month`, the month of each offset is returned as "01" to +#' "12", preceeded by "YYYY-" if no `era` is specified. This is the default +#' period. +#' \item `dekad`, ten-day periods are returned as +#' "Dxx", where xx runs from "01" to "36", preceeded by "YYYY" if no `era` +#' is specified. Each month is subdivided in dekads as follows: 1- days 01 - +#' 10; 2- days 11 - 20; 3- remainder of the month. +#' \item `day`, the month and day of each offset are returned as "MM-DD", +#' preceeded by "YYYY-" if no `era` is specified. +#' } +#' +#' It is not possible to create a factor for a period that is shorter than the +#' temporal resolution of the source data set from which the `t` argument +#' derives. As an example, if the source data set has monthly data, a dekad or +#' day factor cannot be created. +#' +#' Creating factors for other periods is not supported by this function. Factors +#' based on the timestamp information and not dependent on the calendar can +#' trivially be constructed from the output of the [as_timestamp()] function. +#' +#' For non-era factors the attribute 'CFTime' of the result contains a `CFTime` +#' instance that is valid for the result of applying the factor to a data set +#' that the `t` argument is associated with. In other words, if `CFTime` +#' instance 'At' describes the temporal dimension of data set 'A' and a factor +#' 'Af' is generated like `Af <- CFfactor(At)`, then `Bt <- attr(Af, "CFTime")` +#' describes the temporal dimension of the result of, say, +#' `B <- apply(A, 1:2, tapply, Af, FUN)`. The 'CFTime' attribute is `NULL` for +#' era factors. +#' +#' @param t An instance of the `CFTime` class whose offsets will be used to +#' construct the factor. +#' @param period character. A character string with one of the values "year", +#' "season", "quarter", "month" (the default), "dekad" or "day". +#' @param era numeric or list, optional. Vector of years for which to +#' construct the factor, or a list whose elements are each a vector of years. +#' If `era` is not specified, the factor will use the entire time series for +#' the factor. +#' +#' @returns If `era` is a single vector or not specified, a factor with a +#' length equal to the number of offsets in `t`. If `era` is a list, a list +#' with the same number of elements and names as `era`, each containing a +#' factor. Elements in the factor will be set to `NA` for time series values +#' outside of the range of specified years. +#' +#' The factor, or factors in the list, have attributes 'period', 'era' and +#' 'CFTime'. Attribute 'period' holds the value of the `period` argument. +#' Attribute 'era' indicates the number of years that are included in the +#' era, or -1 if no `era` is provided. Attribute 'CFTime' holds an +#' instance of `CFTime` that has the same definition as `t`, but with offsets +#' corresponding to the mid-point of non-era factor levels; if the `era` +#' argument is specified, attribute 'CFTime' is `NULL`. +#' @seealso [cut()] creates a non-era factor for arbitrary cut points. +#' @export +#' +#' @examples +#' t <- CFtime("days since 1949-12-01", "360_day", 19830:54029) +#' +#' # Create a dekad factor for the whole time series +#' f <- CFfactor(t, "dekad") +#' +#' # Create three monthly factors for early, mid and late 21st century eras +#' ep <- CFfactor(t, era = list(early = 2021:2040, mid = 2041:2060, late = 2061:2080)) +CFfactor <- function(t, period = "month", era = NULL) { + if (!(inherits(t, "CFTime"))) stop("First argument to CFfactor() must be an instance of the `CFTime` class", call. = FALSE) + t$factor(period, era) +} + +#' Number of base time units in each factor level +#' +#' Given a factor as returned by [CFfactor()] and the [CFTime] instance from +#' which the factor was derived, this function will return a numeric vector with +#' the number of time units in each level of the factor. +#' +#' The result of this function is useful to convert between absolute and +#' relative values. Climate change anomalies, for instance, are usually computed +#' by differencing average values between a future period and a baseline period. +#' Going from average values back to absolute values for an aggregate period +#' (which is typical for temperature and precipitation, among other variables) +#' is easily done with the result of this function, without having to consider +#' the specifics of the calendar of the data set. +#' +#' If the factor `f` is for an era (e.g. spanning multiple years and the +#' levels do not indicate the specific year), then the result will indicate the +#' number of time units of the period in a regular single year. In other words, +#' for an era of 2041-2060 and a monthly factor on a standard calendar with a +#' `days` unit, the result will be `c(31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)`. +#' Leap days are thus only considered for the `366_day` and `all_leap` calendars. +#' +#' Note that this function gives the number of time units in each level of the +#' factor - the actual number of data points in the `cf` instance per factor +#' level may be different. Use [CFfactor_coverage()] to determine the actual +#' number of data points or the coverage of data points relative to the factor +#' level. +#' +#' @param t An instance of `CFTime`. +#' @param f A factor or a list of factors derived from the +#' parameter `t`. The factor or list thereof should generally be generated by +#' the function [CFfactor()]. +#' +#' @returns If `f` is a factor, a numeric vector with a length equal to the +#' number of levels in the factor, indicating the number of time units in each +#' level of the factor. If `f` is a list of factors, a list with each element +#' a numeric vector as above. +#' @export +#' +#' @examples +#' t <- CFtime("days since 2001-01-01", "365_day", 0:364) +#' f <- CFfactor(t, "dekad") +#' CFfactor_units(t, f) +CFfactor_units <- function(t, f) { + if (!inherits(t, "CFTime")) stop("First argument to `CFfactor_units()` must be an instance of the `CFTime` class", call. = FALSE) + t$factor_units(f) +} + +#' Coverage of time elements for each factor level +#' +#' This function calculates the number of time elements, or the relative +#' coverage, in each level of a factor generated by [CFfactor()]. +#' +#' @param t An instance of [CFTime]. +#' @param f factor or list. A factor or a list of factors derived from the +#' parameter `t`. The factor or list thereof should generally be generated by +#' the function [CFfactor()]. +#' @param coverage "absolute" or "relative". +#' @returns If `f` is a factor, a numeric vector with a length equal to the +#' number of levels in the factor, indicating the number of units from the +#' time series in `t` contained in each level of the factor when +#' `coverage = "absolute"` or the proportion of units present relative to the +#' maximum number when `coverage = "relative"`. If `f` is a list of factors, a +#' list with each element a numeric vector as above. +#' @export +#' +#' @examples +#' t <- CFtime("days since 2001-01-01", "365_day", 0:364) +#' f <- CFfactor(t, "dekad") +#' CFfactor_coverage(t, f, "absolute") +CFfactor_coverage <- function(t, f, coverage = "absolute") { + if (!inherits(t, "CFTime")) stop("First argument to `CFfactor_coverage()` must be an instance of the `CFTime` class", call. = FALSE) + t$factor_coverage(f, coverage) +} + +# ============================================================================== +# Regular functions + +#' Create a vector that represents CF timestamps +#' +#' This function generates a vector of character strings or `POSIXct`s that +#' represent the date and time in a selectable combination for each offset. +#' +#' The character strings use the format `YYYY-MM-DDThh:mm:ss±hhmm`, depending on +#' the `format` specifier. The date in the string is not necessarily compatible +#' with `POSIXt` - in the `360_day` calendar `2017-02-30` is valid and +#' `2017-03-31` is not. +#' +#' For the "proleptic_gregorian" calendar the output can also be generated as a +#' vector of `POSIXct` values by specifying `asPOSIX = TRUE`. The same is +#' possible for the "standard" and "gregorian" calendars but only if all +#' timestamps fall on or after 1582-10-15. +#' +#' @param t The `CFTime` instance that contains the offsets to use. +#' @param format character. A character string with either of the values "date" +#' or "timestamp". If the argument is not specified, the format used is +#' "timestamp" if there is time information, "date" otherwise. +#' @param asPOSIX logical. If `TRUE`, for "standard", "gregorian" and +#' "proleptic_gregorian" calendars the output is a vector of `POSIXct` - for +#' other calendars an error will be thrown. Default value is `FALSE`. +#' @seealso The [CFTime] `format()` method gives greater flexibility through +#' the use of `strptime`-like format specifiers. +#' @returns A character vector where each element represents a moment in time +#' according to the `format` specifier. +#' @export +#' @examples +#' t <- CFtime("hours since 2020-01-01", "standard", seq(0, 24, by = 0.25)) +#' as_timestamp(t, "timestamp") +#' +#' t2 <- CFtime("days since 2002-01-21", "standard", 0:20) +#' tail(as_timestamp(t2, asPOSIX = TRUE)) +#' +#' tail(as_timestamp(t2)) +#' +#' tail(as_timestamp(t2 + 1.5)) +as_timestamp <- function(t, format = NULL, asPOSIX = FALSE) { + if (!(inherits(t, "CFTime"))) + stop("First argument to `as_timestamp()` must be an instance of the `CFTime` class", call. = FALSE) + t$as_timestamp(format, asPOSIX) +} + +#' Return the number of days in a month given a certain CF calendar +#' +#' Given a vector of dates as strings in ISO 8601 or UDUNITS format and a +#' [CFTime] object, this function will return a vector of the same length as the +#' dates, indicating the number of days in the month according to the calendar +#' specification. If no vector of days is supplied, the function will return an +#' integer vector of length 12 with the number of days for each month of the +#' calendar (disregarding the leap day for `standard` and `julian` calendars). +#' +#' @param t The `CFtime` instance to use. +#' @param x character. An optional vector of dates as strings with format +#' `YYYY-MM-DD`. Any time part will be silently ingested. +#' +#' @returns A vector indicating the number of days in each month for the vector +#' of dates supplied as argument `x. Invalidly specified dates will result in +#' an `NA` value. If no dates are supplied, the number of days per month for +#' the calendar as a vector of length 12. +#' +#' @export +#' @seealso When working with factors generated by [CFfactor()], it is usually +#' better to use [CFfactor_units()] as that will consider leap days for +#' non-era factors. [CFfactor_units()] can also work with other time periods +#' and calendar units, such as "hours per month", or "days per season". +#' @examples +#' dates <- c("2021-11-27", "2021-12-10", "2022-01-14", "2022-02-18") +#' t <- CFtime("days since 1850-01-01", "standard") +#' month_days(t, dates) +#' +#' t <- CFtime("days since 1850-01-01", "360_day") +#' month_days(t, dates) +#' +#' t <- CFtime("days since 1850-01-01", "all_leap") +#' month_days(t, dates) +#' +#' month_days(t) +month_days <- function(t, x = NULL) { + stopifnot(inherits(t, "CFTime")) + + if (is.null(x)) + return(t$cal$month_days()) + else { + if (!(is.character(x))) stop("Argument `x` must be a character vector of dates in 'YYYY-MM-DD' format") + + ymd <- t$cal$parse(x) + if (anyNA(ymd$year)) warning("Some dates could not be parsed. Result contains `NA` values.", call. = FALSE) + return(t$cal$month_days(ymd)) + } +} + +#' Parse series of timestamps in CF format to date-time elements +#' +#' This function will parse a vector of timestamps in ISO8601 or UDUNITS format +#' into a data frame with columns for the elements of the timestamp: year, +#' month, day, hour, minute, second, time zone. Those timestamps that could not +#' be parsed or which represent an invalid date in the indicated `CFtime` +#' instance will have `NA` values for the elements of the offending timestamp +#' (which will generate a warning). +#' +#' The supported formats are the *broken timestamp* format from the UDUNITS +#' library and ISO8601 *extended*, both with minor changes, as suggested by the +#' CF Metadata Conventions. In general, the format is `YYYY-MM-DD hh:mm:ss.sss +#' hh:mm`. The year can be from 1 to 4 digits and is interpreted literally, so +#' `79-10-24` is the day Mount Vesuvius erupted and destroyed Pompeii, not +#' `1979-10-24`. The year and month are mandatory, all other fields are +#' optional. There are defaults for all missing values, following the UDUNITS +#' and CF Metadata Conventions. Leading zeros can be omitted in the UDUNITS +#' format, but not in the ISO8601 format. The optional fractional part can have +#' as many digits as the precision calls for and will be applied to the smallest +#' specified time unit. In the result of this function, if the fraction is +#' associated with the minute or the hour, it is converted into a regular +#' `hh:mm:ss.sss` format, i.e. any fraction in the result is always associated +#' with the second, rounded down to milli-second accuracy. The separator between +#' the date and the time can be a single whitespace character or a `T`. +#' +#' The time zone is optional and should have at least the hour or `Z` if +#' present, the minute is optional. The time zone hour can have an optional +#' sign. In the UDUNITS format the separator between the time and the time zone +#' must be a single whitespace character, in ISO8601 there is no separation +#' between the time and the timezone. Time zone names are not supported (as +#' neither UDUNITS nor ISO8601 support them) and will cause parsing to fail when +#' supplied, with one exception: the designator "UTC" is silently dropped (i.e. +#' interpreted as "00:00"). +#' +#' Currently only the extended formats (with separators between the elements) +#' are supported. The vector of timestamps may have any combination of ISO8601 +#' and UDUNITS formats. +#' +#' @param t An instance of `CFTime` to use when parsing the date. +#' @param x Vector of character strings representing timestamps in +#' ISO8601 extended or UDUNITS broken format. +#' @returns A `data.frame` with constituent elements of the parsed timestamps in +#' numeric format. The columns are year, month, day, hour, minute, second +#' (with an optional fraction), time zone (character string), and the +#' corresponding offset value from the origin. Invalid input data will appear +#' as `NA` - if this is the case, a warning message will be displayed - other +#' missing information on input will use default values. +#' @export +#' @examples +#' t <- CFtime("days since 0001-01-01", "proleptic_gregorian") +#' +#' # This will have `NA`s on output and generate a warning +#' timestamps <- c("2012-01-01T12:21:34Z", "12-1-23", "today", +#' "2022-08-16T11:07:34.45-10", "2022-08-16 10.5+04") +#' parse_timestamps(t, timestamps) +parse_timestamps <- function(t, x) { + stopifnot(is.character(x), inherits(t, "CFTime")) + if (t$cal$unit > 4) stop("Parsing of timestamps on a 'month' or 'year' time unit is not supported.", call. = FALSE) + + out <- t$cal$parse(x) + if (anyNA(out$year)) + warning("Some dates could not be parsed. Result contains `NA` values.") + if (length(unique(out$tz)) > 1) + warning("Timestamps have multiple time zones. Some or all may be different from the calendar time zone.") + else if (out$tz[1] != t$cal$timezone) + warning("Timestamps have time zone that is different from the calendar.") + out +} + diff --git a/R/deprecated.R b/R/deprecated.R index 3900111..04d8600 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -11,26 +11,27 @@ #' | ------------------- | -------------------- | #' | CFcomplete() | [is_complete()] | #' | CFmonth_days() | [month_days()] | +#' | CFparse() | [parse_timestamps()] | #' | CFrange() | [range()] | #' | CFsubset() | [slab()] | #' | CFtimestamp() | [as_timestamp()] | #' -#' @param cf,x,format,asPOSIX See replacement functions. +#' @param t,x,format,asPOSIX,extremes See replacement functions. #' #' @returns See replacement functions. #' @rdname deprecated_functions #' @export -CFtimestamp <- function(cf, format = NULL, asPOSIX = FALSE) { +CFtimestamp <- function(t, format = NULL, asPOSIX = FALSE) { warning("Function `CFtimestamp()` is deprecated. Use `as_timestamp()` instead.") - as_timestamp(cf, format, asPOSIX) + as_timestamp(t, format, asPOSIX) } #' @rdname deprecated_functions #' @export -CFmonth_days <- function(cf, x = NULL) { +CFmonth_days <- function(t, x = NULL) { warning("Function `CFmonth_days()` is deprecated. Use `month_days()` instead.") - month_days(cf, x) + month_days(t, x) } #' @rdname deprecated_functions @@ -40,7 +41,16 @@ CFcomplete <- function(x) { is_complete(x) } +#' @rdname deprecated_functions +#' @export CFsubset <- function(x, extremes) { warning("Function `CFsubset()` is deprecated. Use `slab()` instead.") slab(x, extremes) } + +#' @rdname deprecated_functions +#' @export +CFparse <- function(t, x) { + warning("Function `CFparse()` is deprecated. Use `parse_timestamps()` instead.") + parse_timestamps(t, x) +} diff --git a/R/helpers.R b/R/helpers.R new file mode 100644 index 0000000..e2f8eb0 --- /dev/null +++ b/R/helpers.R @@ -0,0 +1,175 @@ +# Internal functions +# +# The functions in this source file are for internal use only. + +# ============================================================================== +# Offsets and timestamp formatting + +#' Validate offsets passed into a CFTime instance +#' +#' Tests the `offsets` values. Throws an error if the argument contains `NA` values. +#' +#' @param offsets The offsets to test +#' +#' @returns logical. `TRUE` if the offsets are valid, throws an error otherwise. +#' @noRd +.validOffsets <- function(offsets) { + if (any(is.na(offsets))) stop("Offsets cannot contain `NA` values.", call. = FALSE) + TRUE +} + +#' Formatting of time strings from time elements +#' +#' This is an internal function that should not generally be used outside of +#' the CFtime package. +#' +#' @param t A `data.frame` representing timestamps. +#' +#' @returns A vector of character strings with a properly formatted time. If any +#' timestamp has a fractional second part, then all time strings will report +#' seconds at milli-second precision. +#' @noRd +.format_time <- function(t) { + fsec <- t$second %% 1L + if (any(fsec > 0L)) { + paste0(sprintf("%02d:%02d:", t$hour, t$minute), ifelse(t$second < 10, "0", ""), sprintf("%.3f", t$second)) + } else { + sprintf("%02d:%02d:%02d", t$hour, t$minute, t$second) + } +} + +#' Do the time elements have time-of-day information? +#' +#' If any time information > 0, then `TRUE` otherwise `FALSE`. +#' +#' This is an internal function that should not generally be used outside of +#' the CFtime package. +#' +#' @param t A `data.frame` representing timestamps. +#' +#' @returns `TRUE` if any timestamp has time-of-day information, `FALSE` otherwise. +#' @noRd +.has_time <- function(t) { + any(t$hour > 0) || any(t$minute > 0) || any(t$second > 0) +} + +#' Do formatting of timestamps with format specifiers +#' +#' @param ts `data.frame` of decomposed offsets. +#' @param tz Time zone character string. +#' @param format A character string with the format specifiers, or +#' "date" or "timestamp". +#' @returns Character vector of formatted timestamps. +#' @noRd +.format_format <- function(ts, tz, format) { + if (format == "") format <- "timestamp" + if (format == "timestamp" && sum(ts$hour, ts$minute, ts$second) == 0) + format <- "date" + + if (format == "date") return(sprintf("%04d-%02d-%02d", ts$year, ts$month, ts$day)) + else if (format == "timestamp") return(sprintf("%04d-%02d-%02d %s", ts$year, ts$month, ts$day, .format_time(ts))) + + # Expand any composite specifiers + format <- stringr::str_replace_all(format, c("%F" = "%Y-%m-%d", "%R" = "%H:%M", "%T" = "%H:%M:%S")) + + # Splice in timestamp values for specifiers + # nocov start + if (grepl("%b|%h", format[1])) { + mon <- strftime(ISOdatetime(2024, 1:12, 1, 0, 0, 0), "%b") + format <- stringr::str_replace_all(format, "%b|%h", mon[ts$month]) + } + if (grepl("%B", format[1])) { + mon <- strftime(ISOdatetime(2024, 1:12, 1, 0, 0, 0), "%B") + format <- stringr::str_replace_all(format, "%B", mon[ts$month]) + } + # nocov end + format <- stringr::str_replace_all(format, "%[O]?d", sprintf("%02d", ts$day)) + format <- stringr::str_replace_all(format, "%e", sprintf("%2d", ts$day)) + format <- stringr::str_replace_all(format, "%[O]?H", sprintf("%02d", ts$hour)) + format <- stringr::str_replace_all(format, "%[O]?I", sprintf("%02d", ts$hour %% 12)) + format <- stringr::str_replace_all(format, "%[O]?m", sprintf("%02d", ts$month)) + format <- stringr::str_replace_all(format, "%[O]?M", sprintf("%02d", ts$minute)) + format <- stringr::str_replace_all(format, "%p", ifelse(ts$hour < 12, "AM", "PM")) + format <- stringr::str_replace_all(format, "%S", sprintf("%02d", as.integer(ts$second))) + format <- stringr::str_replace_all(format, "%[E]?Y", sprintf("%04d", ts$year)) + format <- stringr::str_replace_all(format, "%z", tz) + format <- stringr::str_replace_all(format, "%%", "%") + format +} + +# ============================================================================== +# Other internal functions + +#' Calculate time units in factors +#' +#' @param f factor. Factor as generated by `CFfactor()`. +#' @param cal `CFCalendar` instance of the `CFTime` instance. +#' @param upd numeric. Number of units per day, from the `CFt` environment. +#' @returns A vector as long as the number of levels in the factor. +#' @noRd +.factor_units <- function(f, cal, upd) { + period <- attr(f, "period") + cal_class <- class(cal)[1L] + + res <- if (period == "day") + rep(1L, nlevels(f)) + else if (cal_class == "CFCalendar360") { + rep(c(360L, 90L, 90L, 30L, 10L, 1L)[which(CFt$factor_periods == period)], nlevels(f)) + } else { + if (attr(f, "era") > 0L) { + if (cal_class == "CFCalendar366") { + switch(period, + "year" = rep(366L, nlevels(f)), + "season" = c(91L, 92L, 92L, 91L)[as.integer(substr(levels(f), 2, 2))], + "quarter" = c(91L, 91L, 92L, 92L)[as.integer(levels(f))], + "month" = c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[as.integer(levels(f))], + "dekad" = { + dk <- as.integer(substr(levels(f), 2L, 3L)) + ifelse(dk %% 3L > 0L | dk %in% c(12L, 18L, 27L, 33L), 10L, + ifelse(dk %in% c(3L, 9L, 15L, 21L, 24L, 30L, 36L), 11L, 9L)) + } + ) + } else { + switch(period, + "year" = rep(365L, nlevels(f)), + "season" = c(90L, 92L, 92L, 91L)[as.integer(substr(levels(f), 2, 2))], + "quarter" = c(90L, 91L, 92L, 92L)[as.integer(substr(levels(f), 2, 2))], + "month" = c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[as.integer(levels(f))], + "dekad" = { + dk <- as.integer(substr(levels(f), 2L, 3L)) + ifelse(dk %% 3L > 0L | dk %in% c(12L, 18L, 27L, 33L), 10L, + ifelse(dk %in% c(3L, 9L, 15L, 21L, 24L, 30L, 36L), 11L, 8L)) + } + ) + } + } else { # not an era factor + switch(period, + "year" = ifelse(cal$leap_year(as.integer(levels(f))), 366L, 365L), + "season" = { + year <- as.integer(substr(levels(f), 1L, 4L)) + season <- as.integer(substr(levels(f), 6L, 6L)) + ifelse(cal$leap_year(year), c(91L, 92L, 92L, 91L)[season], c(90L, 92L, 92L, 91L)[season]) + }, + "quarter" = { + year <- as.integer(substr(levels(f), 1L, 4L)) + qtr <- as.integer(substr(levels(f), 6L, 6L)) + ifelse(cal$leap_year(year), c(91L, 91L, 92L, 92L)[qtr], c(90L, 91L, 92L, 92L)[qtr]) + }, + "month" = { + year <- as.integer(substr(levels(f), 1L, 4L)) + month <- as.integer(substr(levels(f), 6L, 7L)) + ifelse(cal$leap_year(year), c(31L, 29L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[month], + c(31L, 28L, 31L, 30L, 31L, 30L, 31L, 31L, 30L, 31L, 30L, 31L)[month]) + }, + "dekad" = { + year <- as.integer(substr(levels(f), 1L, 4L)) + dk <- as.integer(substr(levels(f), 6L, 7L)) + ifelse(dk %% 3L > 0L | dk %in% c(12L, 18L, 27L, 33L), 10L, + ifelse(dk %in% c(3L, 9L, 15L, 21L, 24L, 30L, 36L), 11L, + ifelse(cal$leap_year(year), 9L, 8L))) + } + ) + } + } + res * upd +} diff --git a/R/zzz.R b/R/zzz.R index c69a0ff..c28da6b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,8 +3,6 @@ CFt <- new.env(parent = emptyenv()) .onLoad <- function(libname, pkgname) { - assign("calendars", data.frame(name = c("standard", "gregorian", "proleptic_gregorian", "julian", "360_day", "365_day", "366_day", "noleap", "all_leap"), - id = c(1L, 1L, 1L, 2L, 3L, 4L, 5L, 4L, 5L)), envir = CFt) assign("CFunits", data.frame(unit = c("years", "year", "yr", "months", "month", "mon", "days", "day", "d", "hours", "hour", "hr", "h", "minutes", "minute", "min", "seconds", "second", "sec", "s"), id = c(6L, 6L, 6L, 5L, 5L, 5L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L)), envir = CFt) assign("units", data.frame(name = c("seconds", "minutes", "hours", "days", "months", "years"), diff --git a/README.Rmd b/README.Rmd index 280b9de..bd38c4c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -161,5 +161,6 @@ This package has been tested with the following data sets: - CMIP5 - CORDEX - CMIP6 +- ROMS The package also operates on geographical and/or temporal subsets of data sets so long as the subsetted data complies with the CF Metadata Conventions. This includes subsetting in the [Climate Data Store](https://cds.climate.copernicus.eu/#!/home). Subsetted data from Climate4Impact is not automatically supported because the dimension names are not compliant with the CF Metadata Conventions, use the corresponding dimension names instead. diff --git a/README.md b/README.md index 85b4eec..5059b43 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,7 @@ This package has been tested with the following data sets: - CMIP5 - CORDEX - CMIP6 +- ROMS The package also operates on geographical and/or temporal subsets of data sets so long as the subsetted data complies with the CF Metadata diff --git a/man/CFCalendar.Rd b/man/CFCalendar.Rd new file mode 100644 index 0000000..e7320bd --- /dev/null +++ b/man/CFCalendar.Rd @@ -0,0 +1,282 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CFCalendar.R +\docType{class} +\name{CFCalendar} +\alias{CFCalendar} +\title{Basic CF calendar} +\description{ +This class represents a basic CF calendar. It should not be +instantiated directly; instead, use one of the descendant classes. + +This internal class stores the information to represent date and time +values using the CF conventions. An instance is created by the exported +\link{CFTime} class, which also exposes the relevant properties of this class. + +The following calendars are supported: + +\itemize{ +\item \code{\link[=CFCalendarStandard]{gregorian\\standard}}, the international standard calendar for civil use. +\item \code{\link[=CFCalendarProleptic]{proleptic_gregorian}}, the standard calendar but extending before 1582-10-15 +when the Gregorian calendar was adopted. +\item \code{\link[=CFCalendarJulian]{julian}}, every fourth year is a leap year (so including the years 1700, 1800, 1900, 2100, etc). +\item \code{\link[=CFCalendar365]{noleap\\365_day}}, all years have 365 days. +\item \code{\link[=CFCalendar366]{all_leap\\366_day}}, all years have 366 days. +\item \code{\link[=CFCalendar360]{360_day}}, all years have 360 days, divided over 12 months of 30 days. +} +} +\references{ +https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#calendar +} +\section{Public fields}{ +\if{html}{\out{