Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: support holidays on non-UTC time zones #176

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion crates/augurs-prophet/src/features.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//! Features used by Prophet, such as seasonality, regressors and holidays.
use std::num::NonZeroU32;

use crate::{positive_float::PositiveFloat, Error, TimestampSeconds};
use crate::{
positive_float::PositiveFloat, prophet::prep::ONE_DAY_IN_SECONDS_INT, Error, TimestampSeconds,
};

/// The mode of a seasonality, regressor, or holiday.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
Expand All @@ -20,6 +22,7 @@ pub struct Holiday {
pub(crate) lower_window: Option<Vec<u32>>,
pub(crate) upper_window: Option<Vec<u32>>,
pub(crate) prior_scale: Option<PositiveFloat>,
pub(crate) utc_offset: TimestampSeconds,
}

impl Holiday {
Expand All @@ -30,6 +33,7 @@ impl Holiday {
lower_window: None,
upper_window: None,
prior_scale: None,
utc_offset: 0,
}
}

Expand Down Expand Up @@ -76,6 +80,25 @@ impl Holiday {
self.prior_scale = Some(prior_scale);
self
}

/// Set the UTC offset for the holiday, in seconds.
///
/// The UTC offset is used when deciding whether a timestamp is
/// on the holiday.
///
/// Defaults to 0.
pub fn with_utc_offset(mut self, utc_offset: TimestampSeconds) -> Self {
self.utc_offset = utc_offset;
self
}

/// Return the Unix timestamp of the given date, rounded down to the nearest day,
/// adjusted by the holiday's UTC offset.
pub(crate) fn floor_day(&self, ds: TimestampSeconds) -> TimestampSeconds {
let remainder = (ds + self.utc_offset) % ONE_DAY_IN_SECONDS_INT;
// Adjust the date to the holiday's UTC offset.
ds - remainder
}
}

/// Whether or not to standardize a regressor.
Expand Down Expand Up @@ -232,3 +255,31 @@ impl Seasonality {
self
}
}

#[cfg(test)]
mod test {
use crate::features::Holiday;

#[test]
fn holiday_floor_day_no_offset() {
let holiday = Holiday::new(vec![]);
assert_eq!(holiday.floor_day(1732147200), 1732147200);
assert_eq!(holiday.floor_day(1732189701), 1732147200);
}

#[test]
fn holiday_floor_day_positive_offset() {
let offset = 60 * 60 * 4;
let holiday = Holiday::new(vec![]).with_utc_offset(offset);
assert_eq!(holiday.floor_day(1732132800), 1732132800);
assert_eq!(holiday.floor_day(1732132801), 1732132800);
}

#[test]
fn holiday_floor_day_negative_offset() {
let offset = -60 * 60 * 3;
let holiday = Holiday::new(vec![]).with_utc_offset(offset);
assert_eq!(holiday.floor_day(1732158000), 1732158000);
assert_eq!(holiday.floor_day(1732165200), 1732158000);
}
}
179 changes: 169 additions & 10 deletions crates/augurs-prophet/src/prophet/prep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
const ONE_YEAR_IN_SECONDS: f64 = 365.25 * 24.0 * 60.0 * 60.0;
const ONE_WEEK_IN_SECONDS: f64 = 7.0 * 24.0 * 60.0 * 60.0;
const ONE_DAY_IN_SECONDS: f64 = 24.0 * 60.0 * 60.0;
const ONE_DAY_IN_SECONDS_INT: i64 = 24 * 60 * 60;
pub(crate) const ONE_DAY_IN_SECONDS_INT: i64 = 24 * 60 * 60;

#[derive(Debug, Clone, Default)]
pub(super) struct Scales {
Expand Down Expand Up @@ -682,10 +682,9 @@ impl<O> Prophet<O> {
})
.unwrap_or_else(|| Box::new(std::iter::repeat(0)));

for (dt, lower, upper) in izip!(holiday.ds, lower, upper) {
for (dt, lower, upper) in izip!(&holiday.ds, lower, upper) {
// Round down the original timestamps to the nearest day.
let remainder = dt % ONE_DAY_IN_SECONDS_INT;
let dt_date = dt - remainder;
let dt_date = holiday.floor_day(*dt);

// Check each of the possible offsets allowed by the lower/upper windows.
// We know that the lower window is always positive since it was originally
Expand All @@ -702,11 +701,8 @@ impl<O> Prophet<O> {
.or_insert_with(|| vec![0.0; ds.len()]);

// Get the indices of the ds column that are 'on holiday'.
// Set the value of the holiday column 1.0 for those dates.
for loc in ds
.iter()
.positions(|x| (x - (x % ONE_DAY_IN_SECONDS_INT)) == occurrence)
{
// Set the value of the holiday column to 1.0 for those dates.
for loc in ds.iter().positions(|&x| holiday.floor_day(x) == occurrence) {
col[loc] = 1.0;
}
}
Expand Down Expand Up @@ -1094,9 +1090,19 @@ mod test {

use super::*;
use augurs_testing::assert_approx_eq;
use chrono::NaiveDate;
use chrono::{FixedOffset, NaiveDate, TimeZone, Utc};
use pretty_assertions::assert_eq;

macro_rules! concat_all {
($($x:expr),+ $(,)?) => {{
let mut result = Vec::new();
$(
result.extend($x.iter().cloned());
)+
result
}};
}

#[test]
fn setup_dataframe() {
let (data, _) = train_test_split(daily_univariate_ts(), 0.5);
Expand Down Expand Up @@ -1213,6 +1219,159 @@ mod test {
);
}

#[test]
fn make_holiday_features() {
// Create some hourly data between 2024-01-01 and 2024-01-07.
let start = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let end = Utc.with_ymd_and_hms(2024, 1, 7, 0, 0, 0).unwrap();
let ds = std::iter::successors(Some(start), |d| {
d.checked_add_signed(chrono::Duration::hours(1))
})
.take_while(|d| *d < end)
.map(|d| d.timestamp())
.collect_vec();
// Create two holidays: one in UTC on 2024-01-02 and 2024-01-04;
// one in UTC-3 on the same dates.
// The holidays may appear more than once since the data is hourly,
// and this shouldn't affect the results.
// Ignore windows for now.
let non_utc_tz = FixedOffset::west_opt(3600 * 3).unwrap();
let holidays: HashMap<String, Holiday> = [
(
"UTC holiday".to_string(),
Holiday::new(vec![
Utc.with_ymd_and_hms(2024, 1, 2, 0, 0, 0)
.unwrap()
.timestamp(),
Utc.with_ymd_and_hms(2024, 1, 2, 12, 0, 0)
.unwrap()
.timestamp(),
Utc.with_ymd_and_hms(2024, 1, 4, 0, 0, 0)
.unwrap()
.timestamp(),
]),
),
(
"Non-UTC holiday".to_string(),
Holiday::new(vec![
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 0, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 12, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 4, 0, 0, 0)
.unwrap()
.timestamp(),
])
.with_utc_offset(-3 * 3600),
),
(
"Non-UTC holiday with windows".to_string(),
Holiday::new(vec![
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 0, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 12, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 4, 0, 0, 0)
.unwrap()
.timestamp(),
])
.with_lower_window(vec![1; 3])
.unwrap()
.with_upper_window(vec![1; 3])
.unwrap()
.with_utc_offset(-3 * 3600),
),
]
.into();
let opts = ProphetOptions {
holidays: holidays.clone(),
..Default::default()
};
let prophet = Prophet::new(opts, MockOptimizer::new());
let mut features_frame = FeaturesFrame::new();
let mut prior_scales = Vec::new();
let mut modes = Modes::default();

let holiday_names = prophet.make_holiday_features(
&ds,
holidays,
&mut features_frame,
&mut prior_scales,
&mut modes,
);
assert_eq!(
holiday_names,
HashSet::from([
"UTC holiday".to_string(),
"Non-UTC holiday".to_string(),
"Non-UTC holiday with windows".to_string()
])
);

assert_eq!(features_frame.names.len(), 5);
let utc_idx = features_frame
.names
.iter()
.position(|x| matches!(x, FeatureName::Holiday { name, .. } if name == "UTC holiday"))
.unwrap();
assert_eq!(
features_frame.data[utc_idx],
concat_all!(
&[0.0; 24], // 2024-01-01 - off holiday
&[1.0; 24], // 2024-01-02 - on holiday
&[0.0; 24], // 2024-01-03 - off holiday
&[1.0; 24], // 2024-01-04 - on holiday
&[0.0; 48], // 2024-01-05 and 2024-01-06 - off holiday
),
);
let non_utc_idx = features_frame
.names
.iter()
.position(
|x| matches!(x, FeatureName::Holiday { name, .. } if name == "Non-UTC holiday"),
)
.unwrap();
assert_eq!(
features_frame.data[non_utc_idx],
concat_all!(
&[0.0; 24], // 2024-01-01 - off holiday
&[0.0; 3], // first 3 hours of 2024-01-02 in UTC are off holiday
&[1.0; 24], // rest of 2024-01-02 in UTC, and first 3 hours of the next day, are on holiday
&[0.0; 24], // continue the cycle...
&[1.0; 24],
&[0.0; 21 + 24],
),
);

let non_utc_lower_window_idx = features_frame
.names
.iter()
.position(
|x| matches!(x, FeatureName::Holiday { name, _offset: -1 } if name == "Non-UTC holiday with windows"),
)
.unwrap();
sd2k marked this conversation as resolved.
Show resolved Hide resolved
assert_eq!(
features_frame.data[non_utc_lower_window_idx],
concat_all!(
&[0.0; 3], // first 3 hours of 2024-01-01 in UTC - off holiday
&[1.0; 24], // rest of 2024-01-01 and start of 2024-01-02 are on holiday
&[0.0; 24], // continue the cycle
&[1.0; 24],
&[0.0; 21 + 48],
),
);
}

#[test]
fn regressor_column_matrix() {
let holiday_dates = ["2012-10-09", "2013-10-09"]
Expand Down
11 changes: 11 additions & 0 deletions js/augurs-prophet-js/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,14 @@ pub struct Holiday {
/// The prior scale for the holiday.
#[tsify(optional)]
pub prior_scale: Option<f64>,

/// The UTC offset for the holiday, in seconds.
///
/// The UTC offset is used when deciding whether a timestamp is
/// on the holiday.
#[tsify(optional)]
#[tsify(type = "TimestampSeconds | undefined")]
pub utc_offset_seconds: Option<TimestampSeconds>,
}

impl TryFrom<Holiday> for augurs_prophet::Holiday {
Expand All @@ -1263,6 +1271,9 @@ impl TryFrom<Holiday> for augurs_prophet::Holiday {
if let Some(prior_scale) = value.prior_scale {
holiday = holiday.with_prior_scale(prior_scale.try_into()?);
}
if let Some(utc_offset_seconds) = value.utc_offset_seconds {
holiday = holiday.with_utc_offset(utc_offset_seconds);
}
Ok(holiday)
}
}
Expand Down
Loading