Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: support holidays on non-UTC time zones #176

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 137 additions & 1 deletion crates/augurs-prophet/src/features.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//! Features used by Prophet, such as seasonality, regressors and holidays.
use std::num::NonZeroU32;

use crate::{positive_float::PositiveFloat, Error, TimestampSeconds};
use crate::{
positive_float::PositiveFloat, prophet::prep::ONE_DAY_IN_SECONDS_INT, Error, TimestampSeconds,
};

/// The mode of a seasonality, regressor, or holiday.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
Expand All @@ -20,6 +22,7 @@ pub struct Holiday {
pub(crate) lower_window: Option<Vec<u32>>,
pub(crate) upper_window: Option<Vec<u32>>,
pub(crate) prior_scale: Option<PositiveFloat>,
pub(crate) utc_offset: TimestampSeconds,
}

impl Holiday {
Expand All @@ -30,6 +33,7 @@ impl Holiday {
lower_window: None,
upper_window: None,
prior_scale: None,
utc_offset: 0,
}
}

Expand Down Expand Up @@ -76,6 +80,29 @@ impl Holiday {
self.prior_scale = Some(prior_scale);
self
}

/// Set the UTC offset for the holiday, in seconds.
///
/// Timestamps of a holiday's occurrences are rounded down to the nearest day,
/// but since we're using Unix timestamps rather than timezone-aware dates,
/// holidays default to assuming the 'day' was for 24h from midnight UTC.
///
/// If instead the holiday should be from midnight in a different timezone,
/// use this method to set the offset from UTC of the desired timezone.
///
/// Defaults to 0.
pub fn with_utc_offset(mut self, utc_offset: TimestampSeconds) -> Self {
self.utc_offset = utc_offset;
self
}

/// Return the Unix timestamp of the given date, rounded down to the nearest day,
/// adjusted by the holiday's UTC offset.
pub(crate) fn floor_day(&self, ds: TimestampSeconds) -> TimestampSeconds {
let remainder = (ds + self.utc_offset) % ONE_DAY_IN_SECONDS_INT;
// Adjust the date to the holiday's UTC offset.
ds - remainder
}
}

/// Whether or not to standardize a regressor.
Expand Down Expand Up @@ -232,3 +259,112 @@ impl Seasonality {
self
}
}

#[cfg(test)]
mod test {
use chrono::{FixedOffset, TimeZone, Utc};

use crate::features::Holiday;

#[test]
fn holiday_floor_day_no_offset() {
let holiday = Holiday::new(vec![]);
let offset = Utc;
let expected = offset
.with_ymd_and_hms(2024, 11, 21, 0, 0, 0)
.unwrap()
.timestamp();
assert_eq!(holiday.floor_day(expected), expected);
assert_eq!(
holiday.floor_day(
offset
.with_ymd_and_hms(2024, 11, 21, 15, 3, 12)
.unwrap()
.timestamp()
),
expected
);
}

#[test]
fn holiday_floor_day_positive_offset() {
let offset = FixedOffset::east_opt(60 * 60 * 4).unwrap();
let expected = offset
.with_ymd_and_hms(2024, 11, 21, 0, 0, 0)
.unwrap()
.timestamp();

let holiday = Holiday::new(vec![]).with_utc_offset(offset.local_minus_utc() as i64);
assert_eq!(holiday.floor_day(expected), expected);
assert_eq!(
holiday.floor_day(
offset
.with_ymd_and_hms(2024, 11, 21, 15, 3, 12)
.unwrap()
.timestamp()
),
expected
);
}

#[test]
fn holiday_floor_day_negative_offset() {
let offset = FixedOffset::west_opt(60 * 60 * 3).unwrap();
let expected = offset
.with_ymd_and_hms(2024, 11, 21, 0, 0, 0)
.unwrap()
.timestamp();

let holiday = Holiday::new(vec![]).with_utc_offset(offset.local_minus_utc() as i64);
assert_eq!(holiday.floor_day(expected), expected);
assert_eq!(
holiday.floor_day(
offset
.with_ymd_and_hms(2024, 11, 21, 15, 3, 12)
.unwrap()
.timestamp()
),
expected
);
}

#[test]
fn holiday_floor_day_edge_cases() {
// Test maximum valid offset (UTC+14)
let max_offset = 14 * 60 * 60;
let offset = FixedOffset::east_opt(max_offset).unwrap();
let expected = offset
.with_ymd_and_hms(2024, 11, 21, 0, 0, 0)
.unwrap()
.timestamp();
let holiday_max = Holiday::new(vec![]).with_utc_offset(offset.local_minus_utc() as i64);
assert_eq!(
holiday_max.floor_day(
offset
.with_ymd_and_hms(2024, 11, 21, 12, 0, 0)
.unwrap()
.timestamp()
),
expected
);

// Test near day boundary
let offset = FixedOffset::east_opt(60).unwrap();
let expected = offset
.with_ymd_and_hms(2024, 11, 21, 0, 0, 0)
.unwrap()
.timestamp();
let holiday_near = Holiday::new(vec![]).with_utc_offset(offset.local_minus_utc() as i64);
assert_eq!(
holiday_near.floor_day(
holiday_max.floor_day(
offset
.with_ymd_and_hms(2024, 11, 21, 23, 59, 59)
.unwrap()
.timestamp()
),
),
expected
);
}
}
179 changes: 169 additions & 10 deletions crates/augurs-prophet/src/prophet/prep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
const ONE_YEAR_IN_SECONDS: f64 = 365.25 * 24.0 * 60.0 * 60.0;
const ONE_WEEK_IN_SECONDS: f64 = 7.0 * 24.0 * 60.0 * 60.0;
const ONE_DAY_IN_SECONDS: f64 = 24.0 * 60.0 * 60.0;
const ONE_DAY_IN_SECONDS_INT: i64 = 24 * 60 * 60;
pub(crate) const ONE_DAY_IN_SECONDS_INT: i64 = 24 * 60 * 60;

#[derive(Debug, Clone, Default)]
pub(super) struct Scales {
Expand Down Expand Up @@ -682,10 +682,9 @@ impl<O> Prophet<O> {
})
.unwrap_or_else(|| Box::new(std::iter::repeat(0)));

for (dt, lower, upper) in izip!(holiday.ds, lower, upper) {
for (dt, lower, upper) in izip!(&holiday.ds, lower, upper) {
// Round down the original timestamps to the nearest day.
let remainder = dt % ONE_DAY_IN_SECONDS_INT;
let dt_date = dt - remainder;
let dt_date = holiday.floor_day(*dt);

// Check each of the possible offsets allowed by the lower/upper windows.
// We know that the lower window is always positive since it was originally
Expand All @@ -702,11 +701,8 @@ impl<O> Prophet<O> {
.or_insert_with(|| vec![0.0; ds.len()]);

// Get the indices of the ds column that are 'on holiday'.
// Set the value of the holiday column 1.0 for those dates.
for loc in ds
.iter()
.positions(|x| (x - (x % ONE_DAY_IN_SECONDS_INT)) == occurrence)
{
// Set the value of the holiday column to 1.0 for those dates.
for loc in ds.iter().positions(|&x| holiday.floor_day(x) == occurrence) {
col[loc] = 1.0;
}
}
Expand Down Expand Up @@ -1094,9 +1090,19 @@ mod test {

use super::*;
use augurs_testing::assert_approx_eq;
use chrono::NaiveDate;
use chrono::{FixedOffset, NaiveDate, TimeZone, Utc};
use pretty_assertions::assert_eq;

macro_rules! concat_all {
($($x:expr),+ $(,)?) => {{
let mut result = Vec::new();
$(
result.extend($x.iter().cloned());
)+
result
}};
}

#[test]
fn setup_dataframe() {
let (data, _) = train_test_split(daily_univariate_ts(), 0.5);
Expand Down Expand Up @@ -1213,6 +1219,159 @@ mod test {
);
}

#[test]
fn make_holiday_features() {
// Create some hourly data between 2024-01-01 and 2024-01-07.
let start = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let end = Utc.with_ymd_and_hms(2024, 1, 7, 0, 0, 0).unwrap();
let ds = std::iter::successors(Some(start), |d| {
d.checked_add_signed(chrono::Duration::hours(1))
})
.take_while(|d| *d < end)
.map(|d| d.timestamp())
.collect_vec();
// Create two holidays: one in UTC on 2024-01-02 and 2024-01-04;
// one in UTC-3 on the same dates.
// The holidays may appear more than once since the data is hourly,
// and this shouldn't affect the results.
// Ignore windows for now.
let non_utc_tz = FixedOffset::west_opt(3600 * 3).unwrap();
let holidays: HashMap<String, Holiday> = [
(
"UTC holiday".to_string(),
Holiday::new(vec![
Utc.with_ymd_and_hms(2024, 1, 2, 0, 0, 0)
.unwrap()
.timestamp(),
Utc.with_ymd_and_hms(2024, 1, 2, 12, 0, 0)
.unwrap()
.timestamp(),
Utc.with_ymd_and_hms(2024, 1, 4, 0, 0, 0)
.unwrap()
.timestamp(),
]),
),
(
"Non-UTC holiday".to_string(),
Holiday::new(vec![
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 0, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 12, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 4, 0, 0, 0)
.unwrap()
.timestamp(),
])
.with_utc_offset(-3 * 3600),
),
(
"Non-UTC holiday with windows".to_string(),
Holiday::new(vec![
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 0, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 2, 12, 0, 0)
.unwrap()
.timestamp(),
non_utc_tz
.with_ymd_and_hms(2024, 1, 4, 0, 0, 0)
.unwrap()
.timestamp(),
])
.with_lower_window(vec![1; 3])
.unwrap()
.with_upper_window(vec![1; 3])
.unwrap()
.with_utc_offset(-3 * 3600),
),
]
.into();
let opts = ProphetOptions {
holidays: holidays.clone(),
..Default::default()
};
let prophet = Prophet::new(opts, MockOptimizer::new());
let mut features_frame = FeaturesFrame::new();
let mut prior_scales = Vec::new();
let mut modes = Modes::default();

let holiday_names = prophet.make_holiday_features(
&ds,
holidays,
&mut features_frame,
&mut prior_scales,
&mut modes,
);
assert_eq!(
holiday_names,
HashSet::from([
"UTC holiday".to_string(),
"Non-UTC holiday".to_string(),
"Non-UTC holiday with windows".to_string()
])
);

assert_eq!(features_frame.names.len(), 5);
let utc_idx = features_frame
.names
.iter()
.position(|x| matches!(x, FeatureName::Holiday { name, .. } if name == "UTC holiday"))
.unwrap();
assert_eq!(
features_frame.data[utc_idx],
concat_all!(
&[0.0; 24], // 2024-01-01 - off holiday
&[1.0; 24], // 2024-01-02 - on holiday
&[0.0; 24], // 2024-01-03 - off holiday
&[1.0; 24], // 2024-01-04 - on holiday
&[0.0; 48], // 2024-01-05 and 2024-01-06 - off holiday
),
);
let non_utc_idx = features_frame
.names
.iter()
.position(
|x| matches!(x, FeatureName::Holiday { name, .. } if name == "Non-UTC holiday"),
)
.unwrap();
assert_eq!(
features_frame.data[non_utc_idx],
concat_all!(
&[0.0; 24], // 2024-01-01 - off holiday
&[0.0; 3], // first 3 hours of 2024-01-02 in UTC are off holiday
&[1.0; 24], // rest of 2024-01-02 in UTC, and first 3 hours of the next day, are on holiday
&[0.0; 24], // continue the cycle...
&[1.0; 24],
&[0.0; 21 + 24],
),
);

let non_utc_lower_window_idx = features_frame
.names
.iter()
.position(
|x| matches!(x, FeatureName::Holiday { name, _offset: -1 } if name == "Non-UTC holiday with windows"),
)
.unwrap();
sd2k marked this conversation as resolved.
Show resolved Hide resolved
assert_eq!(
features_frame.data[non_utc_lower_window_idx],
concat_all!(
&[0.0; 3], // first 3 hours of 2024-01-01 in UTC - off holiday
&[1.0; 24], // rest of 2024-01-01 and start of 2024-01-02 are on holiday
&[0.0; 24], // continue the cycle
&[1.0; 24],
&[0.0; 21 + 48],
),
);
}

#[test]
fn regressor_column_matrix() {
let holiday_dates = ["2012-10-09", "2013-10-09"]
Expand Down
Loading
Loading