diff --git a/Cargo.toml b/Cargo.toml index bf3eac2e..d2fb9a62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ augurs-testing = { path = "crates/augurs-testing" } chrono = "0.4.38" distrs = "0.2.1" itertools = "0.13.0" +num-traits = "0.2.19" roots = "0.0.8" serde = { version = "1.0.166", features = ["derive"] } thiserror = "1.0.40" diff --git a/crates/augurs-prophet/Cargo.toml b/crates/augurs-prophet/Cargo.toml index 19ebe4b3..563100a5 100644 --- a/crates/augurs-prophet/Cargo.toml +++ b/crates/augurs-prophet/Cargo.toml @@ -10,6 +10,7 @@ keywords.workspace = true [dependencies] itertools.workspace = true +num-traits.workspace = true thiserror.workspace = true tracing.workspace = true diff --git a/crates/augurs-prophet/src/data.rs b/crates/augurs-prophet/src/data.rs index 35c6ec5b..11d64736 100644 --- a/crates/augurs-prophet/src/data.rs +++ b/crates/augurs-prophet/src/data.rs @@ -77,6 +77,30 @@ impl TrainingData { } self } + + #[cfg(test)] + pub(crate) fn tail(mut self, n: usize) -> Self { + self.ds = self.ds.split_off(n); + self.y = self.y.split_off(n); + if let Some(cap) = self.cap.as_mut() { + *cap = cap.split_off(n); + } + if let Some(floor) = self.floor.as_mut() { + *floor = floor.split_off(n); + } + for (_, v) in self.x.iter_mut() { + *v = v.split_off(n); + } + for (_, v) in self.seasonality_conditions.iter_mut() { + *v = v.split_off(n); + } + self + } + + #[cfg(test)] + pub(crate) fn len(&self) -> usize { + self.ds.len() + } } /// The data needed to predict with a Prophet model. diff --git a/crates/augurs-prophet/src/lib.rs b/crates/augurs-prophet/src/lib.rs index 89a0f353..914811a9 100644 --- a/crates/augurs-prophet/src/lib.rs +++ b/crates/augurs-prophet/src/lib.rs @@ -15,6 +15,7 @@ mod positive_float; mod prophet; #[cfg(test)] mod testdata; +mod util; /// A timestamp represented as seconds since the epoch. pub type TimestampSeconds = u64; @@ -32,3 +33,4 @@ pub use prophet::{ }, Prophet, }; +use util::FloatIterExt; diff --git a/crates/augurs-prophet/src/optimizer.rs b/crates/augurs-prophet/src/optimizer.rs index 2b03e736..88d7dfdc 100644 --- a/crates/augurs-prophet/src/optimizer.rs +++ b/crates/augurs-prophet/src/optimizer.rs @@ -95,7 +95,7 @@ pub enum Algorithm { } /// Arguments for optimization. -#[derive(Debug, Clone, Copy)] +#[derive(Default, Debug, Clone, Copy)] pub struct OptimizeOpts { /// Algorithm to use. pub algorithm: Option, diff --git a/crates/augurs-prophet/src/prophet.rs b/crates/augurs-prophet/src/prophet.rs index e710e300..034ecb83 100644 --- a/crates/augurs-prophet/src/prophet.rs +++ b/crates/augurs-prophet/src/prophet.rs @@ -11,8 +11,8 @@ use tracing::instrument; use crate::{ optimizer::{Data, InitialParams, OptimizeOpts, OptimizedParams, Optimizer}, - Error, FeatureMode, Holiday, PositiveFloat, PredictionData, Regressor, Seasonality, - Standardize, TimestampSeconds, TrainingData, + Error, FeatureMode, FloatIterExt, Holiday, PositiveFloat, PredictionData, Regressor, + Seasonality, Standardize, TimestampSeconds, TrainingData, }; const NO_REGRESSORS_PLACEHOLDER: &str = "__no_regressors_zeros__"; @@ -207,6 +207,9 @@ pub struct Prophet { /// The optimizer to use. optimizer: &'static dyn Optimizer, + /// The processed data used for fitting. + processed: Option, + /// The initial parameters passed to optimization. init: Option, @@ -237,6 +240,7 @@ impl Prophet { train_component_columns: None, train_holiday_names: None, optimizer, + processed: None, init: None, optimized: None, } @@ -482,15 +486,8 @@ impl Prophet { scales.y_scale = y_scale; } (Scaling::MinMax, Some(cap)) => { - scales.y_min = *floor - .iter() - .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)) - .ok_or(Error::Scaling)?; - scales.y_scale = cap - .iter() - .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)) - .ok_or(Error::Scaling)? - - scales.y_min; + scales.y_min = floor.iter().copied().nanmin(true); + scales.y_scale = cap.iter().copied().nanmax(true) - scales.y_min; } _ => { return Err(Error::MissingCap); @@ -500,22 +497,11 @@ impl Prophet { Some(_) | None => match self.opts.scaling { Scaling::AbsMax => { scales.y_min = 0.0; - scales.y_scale = y - .iter() - .map(|y| y.abs()) - .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)) - .ok_or(Error::Scaling)?; + scales.y_scale = y.iter().map(|y| y.abs()).nanmax(true); } Scaling::MinMax => { - scales.y_min = *y - .iter() - .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)) - .ok_or(Error::Scaling)?; - scales.y_scale = *y - .iter() - .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)) - .ok_or(Error::Scaling)? - - scales.y_min; + scales.y_min = y.iter().copied().nanmin(true); + scales.y_scale = y.iter().copied().nanmax(true) - scales.y_min; } }, }; @@ -730,7 +716,15 @@ impl Prophet { Ok(all_holidays) } - fn make_holiday_features(&self, modes: &mut Modes) { + /// Construct a frame of features representing holidays. + fn make_holiday_features( + &self, + ds: &[TimestampSeconds], + holidays: HashMap, + features: &mut FeaturesFrame, + prior_scales: &mut Vec, + modes: &mut Modes, + ) { todo!() } @@ -763,16 +757,16 @@ impl Prophet { } // TODO: Add holiday features. - // let holidays = self.construct_holidays(&history.ds)?; - // if holidays.len() > 0 { - // self.make_holiday_features( - // &history.ds, - // &holidays, - // &mut features, - // &mut prior_scales, - // &mut modes, - // ); - // } + let holidays = self.construct_holidays(&history.ds)?; + if !holidays.is_empty() { + self.make_holiday_features( + &history.ds, + holidays, + &mut features, + &mut prior_scales, + &mut modes, + ); + } // Add regressors. for (name, regressor) in &self.extra_regressors { @@ -952,6 +946,7 @@ impl Prophet { .optimize(init.clone(), preprocessed.data.clone(), opts) .map_err(|e| Error::OptimizationFailed(e.to_string()))?, ); + self.processed = Some(preprocessed); self.init = Some(init); Ok(()) } @@ -967,6 +962,7 @@ impl Prophet { } /// Historical data after preprocessing. +#[derive(Debug)] struct ProcessedData { ds: Vec, t: Vec, @@ -980,6 +976,7 @@ struct ProcessedData { } /// Processed data used for fitting. +#[derive(Debug)] struct Preprocessed { data: Data, history: ProcessedData, @@ -1064,7 +1061,6 @@ impl Preprocessed { } = &self.history; let cap_scaled = cap_scaled.as_ref().ok_or(Error::MissingCap)?; - let t_diff = t[i1] - t[i0]; // Force valid values, in case y > cap or y < 0 @@ -1074,7 +1070,6 @@ impl Preprocessed { let mut r0 = c0 / y0; let r1 = c1 / y1; - if (r0 - r1).abs() <= 0.01 { r0 *= 1.05; } @@ -1091,23 +1086,16 @@ impl Preprocessed { } #[cfg(test)] -mod tests { - use crate::{ - optimizer::dummy_optimizer::DummyOptimizer, testdata::daily_univariate_ts, Standardize, - }; +mod test_trend_component { + use crate::{optimizer::dummy_optimizer::DummyOptimizer, testdata::daily_univariate_ts}; use super::*; use augurs_testing::assert_approx_eq; - use pretty_assertions::assert_eq; #[test] - fn test_growth_init() { + fn growth_init() { let mut data = daily_univariate_ts().head(468); - let max = *data - .y - .iter() - .max_by(|a, b| a.partial_cmp(b).unwrap()) - .unwrap(); + let max = data.y.iter().copied().nanmax(true); data = data.with_cap(vec![max; 468]); let mut opts = ProphetOptions::default(); @@ -1131,13 +1119,9 @@ mod tests { } #[test] - fn test_growth_init_minmax() { + fn growth_init_minmax() { let mut data = daily_univariate_ts().head(468); - let max = *data - .y - .iter() - .max_by(|a, b| a.partial_cmp(b).unwrap()) - .unwrap(); + let max = data.y.iter().copied().nanmax(true); data = data.with_cap(vec![max; 468]); let mut opts = ProphetOptions { @@ -1162,6 +1146,134 @@ mod tests { assert_approx_eq!(init.k, 0.0); assert_approx_eq!(init.m, 0.32792770); } +} + +#[cfg(test)] +mod test_data_prep { + use crate::{ + optimizer::dummy_optimizer::DummyOptimizer, testdata::daily_univariate_ts, + util::FloatIterExt, Standardize, + }; + + use super::*; + use augurs_testing::assert_approx_eq; + use pretty_assertions::assert_eq; + + fn train_test_split(data: TrainingData, ratio: f64) -> (TrainingData, TrainingData) { + let n = data.len(); + let split = (n as f64 * ratio).round() as usize; + (data.clone().head(split), data.tail(n - split)) + } + + #[test] + fn setup_dataframe() { + let (data, _) = train_test_split(daily_univariate_ts(), 0.5); + let mut prophet = Prophet::new(ProphetOptions::default(), &DummyOptimizer); + let (history, _) = prophet.setup_dataframe(data, None).unwrap(); + + assert_approx_eq!(history.t.iter().copied().nanmin(true), 0.0); + assert_approx_eq!(history.t.iter().copied().nanmax(true), 1.0); + assert_approx_eq!(history.y_scaled.iter().copied().nanmax(true), 1.0); + } + + #[test] + fn logistic_floor() { + let (mut data, _) = train_test_split(daily_univariate_ts(), 0.5); + let n = data.len(); + data = data.with_floor(vec![10.0; n]); + data = data.with_cap(vec![80.0; n]); + let opts = ProphetOptions { + growth: GrowthType::Logistic, + ..ProphetOptions::default() + }; + let mut prophet = Prophet::new(opts.clone(), &DummyOptimizer); + prophet.fit(data.clone(), Default::default()).unwrap(); + assert!(prophet.scales.unwrap().logistic_floor); + assert_approx_eq!(prophet.processed.unwrap().history.y_scaled[0], 1.0); + + data.y.iter_mut().for_each(|y| *y += 10.0); + for f in data.floor.as_mut().unwrap() { + *f += 10.0; + } + for c in data.cap.as_mut().unwrap() { + *c += 10.0; + } + let mut prophet = Prophet::new(opts.clone(), &DummyOptimizer); + prophet.fit(data, Default::default()).unwrap(); + assert_eq!(prophet.processed.unwrap().history.y_scaled[0], 1.0); + } + + #[test] + fn logistic_floor_minmax() { + let (mut data, _) = train_test_split(daily_univariate_ts(), 0.5); + let n = data.len(); + data = data.with_floor(vec![10.0; n]); + data = data.with_cap(vec![80.0; n]); + let opts = ProphetOptions { + growth: GrowthType::Logistic, + scaling: Scaling::MinMax, + ..ProphetOptions::default() + }; + let mut prophet = Prophet::new(opts.clone(), &DummyOptimizer); + prophet.fit(data.clone(), Default::default()).unwrap(); + assert!(prophet.scales.unwrap().logistic_floor); + assert!( + prophet + .processed + .as_ref() + .unwrap() + .history + .y_scaled + .iter() + .copied() + .nanmin(true) + > 0.0 + ); + assert!( + prophet + .processed + .unwrap() + .history + .y_scaled + .iter() + .copied() + .nanmax(true) + < 1.0 + ); + + data.y.iter_mut().for_each(|y| *y += 10.0); + for f in data.floor.as_mut().unwrap() { + *f += 10.0; + } + for c in data.cap.as_mut().unwrap() { + *c += 10.0; + } + let mut prophet = Prophet::new(opts.clone(), &DummyOptimizer); + prophet.fit(data, Default::default()).unwrap(); + assert!( + prophet + .processed + .as_ref() + .unwrap() + .history + .y_scaled + .iter() + .copied() + .nanmin(true) + > 0.0 + ); + assert!( + prophet + .processed + .unwrap() + .history + .y_scaled + .iter() + .copied() + .nanmax(true) + < 1.0 + ); + } #[test] fn regressor_column_matrix() { @@ -1259,7 +1371,7 @@ mod tests { } #[test] - fn test_add_group_component() { + fn add_group_component() { let mut components = vec![ (0, "weekly".to_string()), (1, "weekly".to_string()), diff --git a/crates/augurs-prophet/src/util.rs b/crates/augurs-prophet/src/util.rs new file mode 100644 index 00000000..00236bae --- /dev/null +++ b/crates/augurs-prophet/src/util.rs @@ -0,0 +1,78 @@ +use num_traits::Float; + +pub(crate) trait FloatIterExt: Iterator { + /// Returns the minimum of all elements in the iterator, handling NaN values. + /// + /// If `ignore_nans` is true, NaN values will be ignored and + /// not included in the minimum. + /// Otherwise, the minimum will be NaN if any element is NaN. + fn nanmin(self, ignore_nans: bool) -> T + where + Self: Sized, + { + self.reduce(|acc, x| { + if ignore_nans && x.is_nan() { + acc + } else if x.is_nan() || acc.is_nan() { + T::nan() + } else { + acc.min(x) + } + }) + .unwrap_or_else(T::nan) + } + + /// Returns the maximum of all elements in the iterator, handling NaN values. + /// + /// If `ignore_nans` is true, NaN values will be ignored and + /// not included in the maximum. + /// Otherwise, the maximum will be NaN if any element is NaN. + fn nanmax(self, ignore_nans: bool) -> T + where + Self: Sized, + { + self.reduce(|acc, x| { + if ignore_nans && x.is_nan() { + acc + } else if x.is_nan() || acc.is_nan() { + T::nan() + } else { + acc.max(x) + } + }) + .unwrap_or_else(T::nan) + } +} + +impl> FloatIterExt for I {} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn empty() { + let x: &[f64] = &[]; + assert!(x.iter().copied().nanmin(true).is_nan()); + assert!(x.iter().copied().nanmax(true).is_nan()); + } + + #[test] + fn no_nans() { + let x: &[f64] = &[-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0]; + assert_eq!(x.iter().copied().nanmin(true), -3.0); + assert_eq!(x.iter().copied().nanmax(true), 3.0); + assert_eq!(x.iter().copied().nanmin(false), -3.0); + assert_eq!(x.iter().copied().nanmax(false), 3.0); + } + + #[test] + fn nans() { + let x: &[f64] = &[-3.0, -2.0, -1.0, f64::NAN, 1.0, 2.0, 3.0]; + assert_eq!(x.iter().copied().nanmin(true), -3.0); + assert_eq!(x.iter().copied().nanmax(true), 3.0); + + assert!(x.iter().copied().nanmin(false).is_nan()); + assert!(x.iter().copied().nanmax(false).is_nan()); + } +} diff --git a/crates/augurs-seasons/Cargo.toml b/crates/augurs-seasons/Cargo.toml index 2c2019ce..bfb90956 100644 --- a/crates/augurs-seasons/Cargo.toml +++ b/crates/augurs-seasons/Cargo.toml @@ -14,7 +14,7 @@ bench = false [dependencies] itertools.workspace = true -num-traits = "0.2.18" +num-traits.workspace = true thiserror.workspace = true tracing.workspace = true welch-sde = "0.1.0"