diff --git a/Cargo.toml b/Cargo.toml index 54566d3..3f7b11b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "forust-ml" -version = "0.2.20" +version = "0.2.21" edition = "2021" authors = ["James Inlow "] homepage = "https://github.com/jinlow/forust" @@ -24,7 +24,7 @@ log = "0.4.19" [dev-dependencies] criterion = "0.5" -polars = "0.29" +polars = "0.32.1" reqwest = { version = "0.11", features = ["blocking"] } [[bench]] diff --git a/README.md b/README.md index 6c2d9b4..9eddef8 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ pip install forust To use in a rust project add the following to your Cargo.toml file. ```toml -forust-ml = "0.2.20" +forust-ml = "0.2.21" ``` ## Usage diff --git a/py-forust/Cargo.toml b/py-forust/Cargo.toml index 1cdf65b..658319d 100644 --- a/py-forust/Cargo.toml +++ b/py-forust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "py-forust" -version = "0.2.20" +version = "0.2.21" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -10,7 +10,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.19.0", features = ["extension-module"] } -forust-ml = { version = "0.2.20", path = "../" } +forust-ml = { version = "0.2.21", path = "../" } numpy = "0.19.0" ndarray = "0.15.1" serde_plain = { version = "1.0" } diff --git a/py-forust/forust/__init__.py b/py-forust/forust/__init__.py index 0e0a701..c10b84c 100644 --- a/py-forust/forust/__init__.py +++ b/py-forust/forust/__init__.py @@ -44,6 +44,7 @@ class BoosterType(Protocol): monotone_constraints: dict[int, int] prediction_iteration: None | int best_iteration: None | int + base_score: float terminate_missing_features: set[int] def fit( @@ -190,6 +191,7 @@ def __init__( terminate_missing_features: Iterable[Any] | None = None, missing_node_treatment: str = "AssignToParent", log_iterations: int = 0, + force_children_to_bound_parent: bool = False, ): """Gradient Booster Class, used to generate gradient boosted decision tree ensembles. @@ -270,6 +272,7 @@ def __init__( - "AverageLeafWeight": After training each tree, starting from the bottom of the tree, assign the missing node weight to the weighted average of the left and right child nodes. Next assign the parent to the weighted average of the children nodes. This is performed recursively up through the entire tree. This is performed as a post processing step on each tree after it is built, and prior to updating the predictions for which to train the next tree. - "AverageNodeWeight": Set the missing node to be equal to the weighted average weight of the left and the right nodes. log_iterations (bool, optional): Setting to a value (N) other than zero will result in information being logged about ever N iterations, info can be interacted with directly with the python [`logging`](https://docs.python.org/3/howto/logging.html) module. For an example of how to utilize the logging information see the example [here](/#logging-output). + force_children_to_bound_parent (bool, optional): Setting this parameter to `True` will restrict chilren nodes, so that they always contain the parent node inside of their range. Without setting this it's possible that both, the left and the right nodes could be greater, than or less than, the parent node. Defaults to `False`. Raises: TypeError: Raised if an invalid dtype is passed. @@ -347,6 +350,7 @@ def __init__( terminate_missing_features=set(), missing_node_treatment=missing_node_treatment, log_iterations=log_iterations, + force_children_to_bound_parent=force_children_to_bound_parent, ) monotone_constraints_ = ( {} if monotone_constraints is None else monotone_constraints @@ -360,7 +364,8 @@ def __init__( self.l2 = l2 self.gamma = gamma self.min_leaf_weight = min_leaf_weight - self.base_score = base_score + # Use booster getter, as it's more dynamic + # self.base_score = base_score self.nbins = nbins self.parallel = parallel self.allow_missing_splits = allow_missing_splits @@ -377,6 +382,8 @@ def __init__( self.initialize_base_score = initialize_base_score self.terminate_missing_features = terminate_missing_features_ self.missing_node_treatment = missing_node_treatment + self.log_iterations = log_iterations + self.force_children_to_bound_parent = force_children_to_bound_parent def fit( self, @@ -551,7 +558,7 @@ def set_prediction_iteration(self, iteration: int): Args: iteration (int): Iteration number to use, this will use all trees, up to this - index. + index. Setting this to 10, would result in trees 0 through 9 used for predictions. """ self.booster.prediction_iteration = iteration @@ -843,6 +850,29 @@ def get_evaluation_history(self) -> np.ndarray | None: r, v, d = self.booster.get_evaluation_history() return d.reshape((r, v)) + @property + def best_iteration(self) -> int | None: + """Get the best iteration if `early_stopping_rounds` was used when fitting. + + Returns: + int | None: The best iteration, or None if `early_stopping_rounds` wasn't used. + """ + return self.booster.best_iteration + + @property + def base_score(self) -> float: + """Base score used as initial prediction value""" + return self.booster.base_score + + @property + def prediction_iteration(self) -> int | None: + """The prediction_iteration that will be used when predicting, up to this many trees will be used. + + Returns: + int | None: Int if this is set, otherwise, None, in which case all trees will be used. + """ + return self.booster.prediction_iteration + def get_best_iteration(self) -> int | None: """Get the best iteration if `early_stopping_rounds` was used when fitting. diff --git a/py-forust/src/lib.rs b/py-forust/src/lib.rs index 010cfba..4f0e3a3 100644 --- a/py-forust/src/lib.rs +++ b/py-forust/src/lib.rs @@ -79,6 +79,7 @@ impl GradientBooster { terminate_missing_features, missing_node_treatment, log_iterations, + force_children_to_bound_parent, ))] pub fn new( objective_type: &str, @@ -108,6 +109,7 @@ impl GradientBooster { terminate_missing_features: HashSet, missing_node_treatment: &str, log_iterations: usize, + force_children_to_bound_parent: bool, ) -> PyResult { let constraints = int_map_to_constraint_map(monotone_constraints)?; let objective_ = to_value_error(serde_plain::from_str(objective_type))?; @@ -150,6 +152,7 @@ impl GradientBooster { terminate_missing_features, missing_node_treatment_, log_iterations, + force_children_to_bound_parent, ); Ok(GradientBooster { booster: to_value_error(booster)?, @@ -175,11 +178,21 @@ impl GradientBooster { Ok(()) } + #[getter] + fn prediction_iteration(&self) -> PyResult> { + Ok(self.booster.prediction_iteration) + } + #[getter] fn best_iteration(&self) -> PyResult> { Ok(self.booster.best_iteration) } + #[getter] + fn base_score(&self) -> PyResult { + Ok(self.booster.base_score) + } + pub fn fit( &mut self, flat_data: PyReadonlyArray1, @@ -399,6 +412,10 @@ impl GradientBooster { missing_node_treatment_.to_object(py), ), ("log_iterations", self.booster.log_iterations.to_object(py)), + ( + "force_children_to_bound_parent", + self.booster.force_children_to_bound_parent.to_object(py), + ), ]; let dict = key_vals.into_py_dict(py); Ok(dict.to_object(py)) diff --git a/rs-example.md b/rs-example.md index 44bed6b..6feea2d 100644 --- a/rs-example.md +++ b/rs-example.md @@ -3,7 +3,7 @@ To run this example, add the following code to your `Cargo.toml` file. ```toml [dependencies] -forust-ml = "0.2.20" +forust-ml = "0.2.21" polars = "0.28" reqwest = { version = "0.11", features = ["blocking"] } ``` diff --git a/src/gradientbooster.rs b/src/gradientbooster.rs index f9674d6..c3bc1c9 100644 --- a/src/gradientbooster.rs +++ b/src/gradientbooster.rs @@ -165,6 +165,9 @@ pub struct GradientBooster { /// Should the model be trained showing output. #[serde(default = "default_log_iterations")] pub log_iterations: usize, + /// Should the children nodes contain the parent node in their bounds, setting this to true, will result in no children being created that result in the higher and lower child values both being greater than, or less than the parent weight. + #[serde(default = "default_force_children_to_bound_parent")] + pub force_children_to_bound_parent: bool, // Members internal to the booster object, and not parameters set by the user. // Trees is public, just to interact with it directly in the python wrapper. pub trees: Vec, @@ -214,6 +217,9 @@ fn default_missing_node_treatment() -> MissingNodeTreatment { fn default_log_iterations() -> usize { 0 } +fn default_force_children_to_bound_parent() -> bool { + false +} fn parse_missing<'de, D>(d: D) -> Result where @@ -252,6 +258,7 @@ impl Default for GradientBooster { HashSet::new(), MissingNodeTreatment::AssignToParent, 0, + false, ) .unwrap() } @@ -329,6 +336,7 @@ impl GradientBooster { terminate_missing_features: HashSet, missing_node_treatment: MissingNodeTreatment, log_iterations: usize, + force_children_to_bound_parent: bool, ) -> Result { let (base_score_, initialize_base_score_) = match base_score { Some(v) => (v, initialize_base_score), @@ -365,6 +373,7 @@ impl GradientBooster { prediction_iteration: None, missing_node_treatment, log_iterations, + force_children_to_bound_parent, trees: Vec::new(), metadata: HashMap::new(), }; @@ -411,6 +420,7 @@ impl GradientBooster { constraints_map, terminate_missing_features: self.terminate_missing_features.clone(), missing_node_treatment: self.missing_node_treatment, + force_children_to_bound_parent: self.force_children_to_bound_parent, }; self.fit_trees(y, sample_weight, data, &splitter, evaluation_data)?; } else { diff --git a/src/node.rs b/src/node.rs index b6b042b..42429e4 100644 --- a/src/node.rs +++ b/src/node.rs @@ -184,10 +184,10 @@ impl SplittableNode { self.missing_node = missing_child; self.is_leaf = false; } - pub fn as_node(&self) -> Node { + pub fn as_node(&self, learning_rate: f32) -> Node { Node { num: self.num, - weight_value: self.weight_value, + weight_value: self.weight_value * learning_rate, hessian_sum: self.hessian_sum, depth: self.depth, missing_node: self.missing_node, diff --git a/src/splitter.rs b/src/splitter.rs index 2762a3d..d68a532 100644 --- a/src/splitter.rs +++ b/src/splitter.rs @@ -7,7 +7,7 @@ use crate::histogram::HistogramMatrix; use crate::node::SplittableNode; use crate::tree::Tree; use crate::utils::{ - constrained_weight, cull_gain, gain_given_weight, pivot_on_split, + between, bound_to_parent, constrained_weight, cull_gain, gain_given_weight, pivot_on_split, pivot_on_split_exclude_missing, }; @@ -87,6 +87,7 @@ pub trait Splitter { missing_hessian: f32, lower_bound: f32, upper_bound: f32, + parent_weight: f32, constraint: Option<&Constraint>, ) -> Option<(NodeInfo, NodeInfo, MissingInfo)>; @@ -112,18 +113,18 @@ pub trait Splitter { let right_gradient = node.gradient_sum - cuml_grad - missing.gradient_sum; let right_hessian = node.hessian_sum - cuml_hess - missing.hessian_sum; - let (mut left_node_info, mut right_node_info, mut missing_info) = match self - .evaluate_split( - left_gradient, - left_hessian, - right_gradient, - right_hessian, - missing.gradient_sum, - missing.hessian_sum, - node.lower_bound, - node.upper_bound, - constraint, - ) { + let (mut left_node_info, mut right_node_info, missing_info) = match self.evaluate_split( + left_gradient, + left_hessian, + right_gradient, + right_hessian, + missing.gradient_sum, + missing.hessian_sum, + node.lower_bound, + node.upper_bound, + node.weight_value, + constraint, + ) { None => { cuml_grad += bin.gradient_sum; cuml_hess += bin.hessian_sum; @@ -170,12 +171,7 @@ pub trait Splitter { }; left_node_info.bounds = left_bounds; right_node_info.bounds = right_bounds; - // Apply shrinkage at this point... - left_node_info.weight *= self.get_learning_rate(); - right_node_info.weight *= self.get_learning_rate(); - if let MissingInfo::Branch(info) | MissingInfo::Leaf(info) = &mut missing_info { - info.weight *= self.get_learning_rate(); - } + // If split gain is NaN, one of the sides is empty, do not allow // this split. let split_gain = if split_gain.is_nan() { 0.0 } else { split_gain }; @@ -252,6 +248,7 @@ pub struct MissingBranchSplitter { pub constraints_map: ConstraintMap, pub terminate_missing_features: HashSet, pub missing_node_treatment: MissingNodeTreatment, + pub force_children_to_bound_parent: bool, } impl MissingBranchSplitter { @@ -339,6 +336,7 @@ impl Splitter for MissingBranchSplitter { missing_hessian: f32, lower_bound: f32, upper_bound: f32, + parent_weight: f32, constraint: Option<&Constraint>, ) -> Option<(NodeInfo, NodeInfo, MissingInfo)> { // If there is no info right, or there is no @@ -350,7 +348,7 @@ impl Splitter for MissingBranchSplitter { return None; } - let left_weight = constrained_weight( + let mut left_weight = constrained_weight( &self.l2, left_gradient, left_hessian, @@ -358,7 +356,7 @@ impl Splitter for MissingBranchSplitter { upper_bound, constraint, ); - let right_weight = constrained_weight( + let mut right_weight = constrained_weight( &self.l2, right_gradient, right_hessian, @@ -367,6 +365,12 @@ impl Splitter for MissingBranchSplitter { constraint, ); + if self.force_children_to_bound_parent { + (left_weight, right_weight) = bound_to_parent(parent_weight, left_weight, right_weight); + assert!(between(lower_bound, upper_bound, left_weight)); + assert!(between(lower_bound, upper_bound, right_weight)); + } + let left_gain = gain_given_weight(&self.l2, left_gradient, left_hessian, left_weight); let right_gain = gain_given_weight(&self.l2, right_gradient, right_hessian, right_weight); @@ -726,6 +730,7 @@ impl Splitter for MissingImputerSplitter { missing_hessian: f32, lower_bound: f32, upper_bound: f32, + _parent_weight: f32, constraint: Option<&Constraint>, ) -> Option<(NodeInfo, NodeInfo, MissingInfo)> { // If there is no info right, or there is no diff --git a/src/tree.rs b/src/tree.rs index fe785f7..e098f01 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -83,7 +83,8 @@ impl Tree { f32::INFINITY, ); // Add the first node to the tree nodes. - self.nodes.push(root_node.as_node()); + self.nodes + .push(root_node.as_node(splitter.get_learning_rate())); let mut n_leaves = 1; let mut growable: Box = match grow_policy { @@ -132,7 +133,7 @@ impl Tree { n_leaves += n_new_nodes; n_nodes += n_new_nodes; for n in new_nodes { - self.nodes.push(n.as_node()); + self.nodes.push(n.as_node(splitter.get_learning_rate())); if !n.is_missing_leaf { growable.add_node(n) } diff --git a/src/utils.rs b/src/utils.rs index 925e2b3..7191f6d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -106,6 +106,54 @@ pub fn constrained_weight( } } +/// Test if v is contained within the range i and j +#[inline] +pub fn between(i: f32, j: f32, v: f32) -> bool { + if i > j { + (i >= v) && (v >= j) + } else { + (i <= v) && (v <= j) + } +} + +#[inline] +pub fn bound_to_parent(parent_weight: f32, left_weight: f32, right_weight: f32) -> (f32, f32) { + if between(left_weight, right_weight, parent_weight) { + (left_weight, right_weight) + } else { + // If we are here, we know, the parent weight is above or bellow + // the right and left weights range, because of the between check. + if left_weight > right_weight { + // Here is what it looks like on the number line if we are here + // right...left + // Is the parent above the range? + // i.e. right...left...parent? + if left_weight < parent_weight { + (parent_weight, right_weight) + } else { + // Otherwise if we are here, it must be outside of the range on the other side.. + // i.e. parent...right...left + // In which case make parent equal right. + (left_weight, parent_weight) + } + } else { + // Here is what the number line looks like at this point... + // left_weight..right_weight + // Is the parent above the range? + // i.e. left...right...parent? + if right_weight < parent_weight { + // In which case set right equal to parent. + (left_weight, parent_weight) + } else { + // Is the parent bellow the range? + // i.e. parent...left...right... + // In which case set the left equal to the parent. + (parent_weight, right_weight) + } + } + } +} + /// Convert Log odds to probability #[inline] pub fn odds(v: f64) -> f64 {