Skip to content

Commit

Permalink
fix: support holidays on non-UTC time zones
Browse files Browse the repository at this point in the history
Prior to this commit, when determining whether a given holiday's
features should be 0 or 1 for a given timestamp, we checked whether
each _day_ included in the holiday's lower-upper windows included
the timestamp, and set the value to 1 if so. However, when rounding
the holiday's timestamps down to 'day' we assumed that the holiday
started and ended at midnight UTC, which won't be the case for certain
holidays (i.e. anything outside of UTC).

This commit does three things:

1. adds the 'Holiday::with_utc_offset()' method which allows a holiday to
   use non-UTC-aligned days when its timestamps are being floored
2. rather than adding a separate feature for each time a holiday's lower/upper
   window are found to contain a timestamp, reuse the same feature for each
   offset, which is what the Python Prophet implementation does. Really this
   part should be moved to a separate bugfix PR...
3. switches the lower and upper windows to be u32 instead of i32, to
   reflect the fact that they should never really be negative (it is
   quite confusing that the original Prophet expects lower windows to
   always be negative and upper windows always positive).
  • Loading branch information
sd2k committed Nov 21, 2024
1 parent effd7ee commit fbd0d11
Show file tree
Hide file tree
Showing 3 changed files with 277 additions and 32 deletions.
63 changes: 57 additions & 6 deletions crates/augurs-prophet/src/features.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//! Features used by Prophet, such as seasonality, regressors and holidays.
use std::num::NonZeroU32;

use crate::{positive_float::PositiveFloat, Error, TimestampSeconds};
use crate::{
positive_float::PositiveFloat, prophet::prep::ONE_DAY_IN_SECONDS_INT, Error, TimestampSeconds,
};

/// The mode of a seasonality, regressor, or holiday.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
Expand All @@ -17,9 +19,10 @@ pub enum FeatureMode {
#[derive(Debug, Clone)]
pub struct Holiday {
pub(crate) ds: Vec<TimestampSeconds>,
pub(crate) lower_window: Option<Vec<i32>>,
pub(crate) upper_window: Option<Vec<i32>>,
pub(crate) lower_window: Option<Vec<u32>>,
pub(crate) upper_window: Option<Vec<u32>>,
pub(crate) prior_scale: Option<PositiveFloat>,
pub(crate) utc_offset: TimestampSeconds,
}

impl Holiday {
Expand All @@ -30,16 +33,17 @@ impl Holiday {
lower_window: None,
upper_window: None,
prior_scale: None,
utc_offset: 0,
}
}

/// Set the lower window for the holiday.
///
/// The lower window is the number of days before the holiday
/// that it is observed. For example, if the holiday is on
/// 2023-01-01 and the lower window is -1, then the holiday will
/// 2023-01-01 and the lower window is 1, then the holiday will
/// _also_ be observed on 2022-12-31.
pub fn with_lower_window(mut self, lower_window: Vec<i32>) -> Result<Self, Error> {
pub fn with_lower_window(mut self, lower_window: Vec<u32>) -> Result<Self, Error> {
if self.ds.len() != lower_window.len() {
return Err(Error::MismatchedLengths {
a_name: "ds".to_string(),
Expand All @@ -58,7 +62,7 @@ impl Holiday {
/// that it is observed. For example, if the holiday is on
/// 2023-01-01 and the upper window is 1, then the holiday will
/// _also_ be observed on 2023-01-02.
pub fn with_upper_window(mut self, upper_window: Vec<i32>) -> Result<Self, Error> {
pub fn with_upper_window(mut self, upper_window: Vec<u32>) -> Result<Self, Error> {
if self.ds.len() != upper_window.len() {
return Err(Error::MismatchedLengths {
a_name: "ds".to_string(),
Expand All @@ -76,6 +80,25 @@ impl Holiday {
self.prior_scale = Some(prior_scale);
self
}

/// Set the UTC offset for the holiday, in seconds.
///
/// The UTC offset is used when deciding whether a timestamp is
/// on the holiday.
///
/// Defaults to 0.
pub fn with_utc_offset(mut self, utc_offset: TimestampSeconds) -> Self {
self.utc_offset = utc_offset;
self
}

/// Return the Unix timestamp of the given date, rounded down to the nearest day,
/// adjusted by the holiday's UTC offset.
pub(crate) fn floor_day(&self, ds: TimestampSeconds) -> TimestampSeconds {
let remainder = (ds + self.utc_offset) % ONE_DAY_IN_SECONDS_INT;
// Adjust the date to the holiday's UTC offset.
ds - remainder
}
}

/// Whether or not to standardize a regressor.
Expand Down Expand Up @@ -232,3 +255,31 @@ impl Seasonality {
self
}
}

#[cfg(test)]
mod test {
use crate::features::Holiday;

#[test]
fn holiday_floor_day_no_offset() {
let holiday = Holiday::new(vec![]);
assert_eq!(holiday.floor_day(1732147200), 1732147200);
assert_eq!(holiday.floor_day(1732189701), 1732147200);
}

#[test]
fn holiday_floor_day_positive_offset() {
let offset = 60 * 60 * 4;
let holiday = Holiday::new(vec![]).with_utc_offset(offset);
assert_eq!(holiday.floor_day(1732132800), 1732132800);
assert_eq!(holiday.floor_day(1732132801), 1732132800);
}

#[test]
fn holiday_floor_day_negative_offset() {
let offset = -60 * 60 * 3;
let holiday = Holiday::new(vec![]).with_utc_offset(offset);
assert_eq!(holiday.floor_day(1732158000), 1732158000);
assert_eq!(holiday.floor_day(1732165200), 1732158000);
}
}
Loading

0 comments on commit fbd0d11

Please sign in to comment.