Merge pull request #64 from jinlow/force-chilred-to-contain-parent

force children to contain parent weight
jinlow · Aug 23, 2023 · 145f705 · 145f705
2 parents a2b1e17 + 5978b88
commit 145f705
Show file tree

Hide file tree

Showing 11 changed files with 144 additions and 33 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "forust-ml"
-version = "0.2.20"
+version = "0.2.21"
 edition = "2021"
 authors = ["James Inlow <[email protected]>"]
 homepage = "https://github.com/jinlow/forust"
@@ -24,7 +24,7 @@ log = "0.4.19"
 
 [dev-dependencies]
 criterion = "0.5"
-polars = "0.29"
+polars = "0.32.1"
 reqwest = { version = "0.11", features = ["blocking"] }
 
 [[bench]]

diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ pip install forust
 
 To use in a rust project add the following to your Cargo.toml file.
 ```toml
-forust-ml = "0.2.20"
+forust-ml = "0.2.21"
 ```
 
 ## Usage

diff --git a/py-forust/Cargo.toml b/py-forust/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "py-forust"
-version = "0.2.20"
+version = "0.2.21"
 edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -10,7 +10,7 @@ crate-type = ["cdylib"]
 
 [dependencies]
 pyo3 = { version = "0.19.0", features = ["extension-module"] }
-forust-ml = { version = "0.2.20", path = "../" }
+forust-ml = { version = "0.2.21", path = "../" }
 numpy = "0.19.0"
 ndarray = "0.15.1"
 serde_plain = { version = "1.0" }

diff --git a/py-forust/forust/__init__.py b/py-forust/forust/__init__.py
@@ -44,6 +44,7 @@ class BoosterType(Protocol):
     monotone_constraints: dict[int, int]
     prediction_iteration: None | int
     best_iteration: None | int
+    base_score: float
     terminate_missing_features: set[int]
 
     def fit(
@@ -190,6 +191,7 @@ def __init__(
         terminate_missing_features: Iterable[Any] | None = None,
         missing_node_treatment: str = "AssignToParent",
         log_iterations: int = 0,
+        force_children_to_bound_parent: bool = False,
     ):
         """Gradient Booster Class, used to generate gradient boosted decision tree ensembles.
 
@@ -270,6 +272,7 @@ def __init__(
                 - "AverageLeafWeight": After training each tree, starting from the bottom of the tree, assign the missing node weight to the weighted average of the left and right child nodes. Next assign the parent to the weighted average of the children nodes. This is performed recursively up through the entire tree. This is performed as a post processing step on each tree after it is built, and prior to updating the predictions for which to train the next tree.
                 - "AverageNodeWeight": Set the missing node to be equal to the weighted average weight of the left and the right nodes.
             log_iterations (bool, optional): Setting to a value (N) other than zero will result in information being logged about ever N iterations, info can be interacted with directly with the python [`logging`](https://docs.python.org/3/howto/logging.html) module. For an example of how to utilize the logging information see the example [here](/#logging-output).
+            force_children_to_bound_parent (bool, optional): Setting this parameter to `True` will restrict chilren nodes, so that they always contain the parent node inside of their range. Without setting this it's possible that both, the left and the right nodes could be greater, than or less than, the parent node. Defaults to `False`.
 
         Raises:
             TypeError: Raised if an invalid dtype is passed.
@@ -347,6 +350,7 @@ def __init__(
             terminate_missing_features=set(),
             missing_node_treatment=missing_node_treatment,
             log_iterations=log_iterations,
+            force_children_to_bound_parent=force_children_to_bound_parent,
         )
         monotone_constraints_ = (
             {} if monotone_constraints is None else monotone_constraints
@@ -360,7 +364,8 @@ def __init__(
         self.l2 = l2
         self.gamma = gamma
         self.min_leaf_weight = min_leaf_weight
-        self.base_score = base_score
+        # Use booster getter, as it's more dynamic
+        # self.base_score = base_score
         self.nbins = nbins
         self.parallel = parallel
         self.allow_missing_splits = allow_missing_splits
@@ -377,6 +382,8 @@ def __init__(
         self.initialize_base_score = initialize_base_score
         self.terminate_missing_features = terminate_missing_features_
         self.missing_node_treatment = missing_node_treatment
+        self.log_iterations = log_iterations
+        self.force_children_to_bound_parent = force_children_to_bound_parent
 
     def fit(
         self,
@@ -551,7 +558,7 @@ def set_prediction_iteration(self, iteration: int):
 
         Args:
             iteration (int): Iteration number to use, this will use all trees, up to this
-                index.
+                index. Setting this to 10, would result in trees 0 through 9 used for predictions.
         """
         self.booster.prediction_iteration = iteration
 
@@ -843,6 +850,29 @@ def get_evaluation_history(self) -> np.ndarray | None:
         r, v, d = self.booster.get_evaluation_history()
         return d.reshape((r, v))
 
+    @property
+    def best_iteration(self) -> int | None:
+        """Get the best iteration if `early_stopping_rounds` was used when fitting.
+
+        Returns:
+            int | None: The best iteration, or None if `early_stopping_rounds` wasn't used.
+        """
+        return self.booster.best_iteration
+
+    @property
+    def base_score(self) -> float:
+        """Base score used as initial prediction value"""
+        return self.booster.base_score
+
+    @property
+    def prediction_iteration(self) -> int | None:
+        """The prediction_iteration that will be used when predicting, up to this many trees will be used.
+
+        Returns:
+            int | None: Int if this is set, otherwise, None, in which case all trees will be used.
+        """
+        return self.booster.prediction_iteration
+
     def get_best_iteration(self) -> int | None:
         """Get the best iteration if `early_stopping_rounds` was used when fitting.
 

diff --git a/py-forust/src/lib.rs b/py-forust/src/lib.rs
@@ -79,6 +79,7 @@ impl GradientBooster {
         terminate_missing_features,
         missing_node_treatment,
         log_iterations,
+        force_children_to_bound_parent,
     ))]
     pub fn new(
         objective_type: &str,
@@ -108,6 +109,7 @@ impl GradientBooster {
         terminate_missing_features: HashSet<usize>,
         missing_node_treatment: &str,
         log_iterations: usize,
+        force_children_to_bound_parent: bool,
     ) -> PyResult<Self> {
         let constraints = int_map_to_constraint_map(monotone_constraints)?;
         let objective_ = to_value_error(serde_plain::from_str(objective_type))?;
@@ -150,6 +152,7 @@ impl GradientBooster {
             terminate_missing_features,
             missing_node_treatment_,
             log_iterations,
+            force_children_to_bound_parent,
         );
         Ok(GradientBooster {
             booster: to_value_error(booster)?,
@@ -175,11 +178,21 @@ impl GradientBooster {
         Ok(())
     }
 
+    #[getter]
+    fn prediction_iteration(&self) -> PyResult<Option<usize>> {
+        Ok(self.booster.prediction_iteration)
+    }
+
     #[getter]
     fn best_iteration(&self) -> PyResult<Option<usize>> {
         Ok(self.booster.best_iteration)
     }
 
+    #[getter]
+    fn base_score(&self) -> PyResult<f64> {
+        Ok(self.booster.base_score)
+    }
+
     pub fn fit(
         &mut self,
         flat_data: PyReadonlyArray1<f64>,
@@ -399,6 +412,10 @@ impl GradientBooster {
                 missing_node_treatment_.to_object(py),
             ),
             ("log_iterations", self.booster.log_iterations.to_object(py)),
+            (
+                "force_children_to_bound_parent",
+                self.booster.force_children_to_bound_parent.to_object(py),
+            ),
         ];
         let dict = key_vals.into_py_dict(py);
         Ok(dict.to_object(py))

diff --git a/rs-example.md b/rs-example.md
@@ -3,7 +3,7 @@
 To run this example, add the following code to your `Cargo.toml` file.
 ```toml
 [dependencies]
-forust-ml = "0.2.20"
+forust-ml = "0.2.21"
 polars = "0.28"
 reqwest = { version = "0.11", features = ["blocking"] }
 ```

diff --git a/src/gradientbooster.rs b/src/gradientbooster.rs
@@ -165,6 +165,9 @@ pub struct GradientBooster {
     /// Should the model be trained showing output.
     #[serde(default = "default_log_iterations")]
     pub log_iterations: usize,
+    /// Should the children nodes contain the parent node in their bounds, setting this to true, will result in no children being created that result in the higher and lower child values both being greater than, or less than the parent weight.
+    #[serde(default = "default_force_children_to_bound_parent")]
+    pub force_children_to_bound_parent: bool,
     // Members internal to the booster object, and not parameters set by the user.
     // Trees is public, just to interact with it directly in the python wrapper.
     pub trees: Vec<Tree>,
@@ -214,6 +217,9 @@ fn default_missing_node_treatment() -> MissingNodeTreatment {
 fn default_log_iterations() -> usize {
     0
 }
+fn default_force_children_to_bound_parent() -> bool {
+    false
+}
 
 fn parse_missing<'de, D>(d: D) -> Result<f64, D::Error>
 where
@@ -252,6 +258,7 @@ impl Default for GradientBooster {
             HashSet::new(),
             MissingNodeTreatment::AssignToParent,
             0,
+            false,
         )
         .unwrap()
     }
@@ -329,6 +336,7 @@ impl GradientBooster {
         terminate_missing_features: HashSet<usize>,
         missing_node_treatment: MissingNodeTreatment,
         log_iterations: usize,
+        force_children_to_bound_parent: bool,
     ) -> Result<Self, ForustError> {
         let (base_score_, initialize_base_score_) = match base_score {
             Some(v) => (v, initialize_base_score),
@@ -365,6 +373,7 @@ impl GradientBooster {
             prediction_iteration: None,
             missing_node_treatment,
             log_iterations,
+            force_children_to_bound_parent,
             trees: Vec::new(),
             metadata: HashMap::new(),
         };
@@ -411,6 +420,7 @@ impl GradientBooster {
                 constraints_map,
                 terminate_missing_features: self.terminate_missing_features.clone(),
                 missing_node_treatment: self.missing_node_treatment,
+                force_children_to_bound_parent: self.force_children_to_bound_parent,
             };
             self.fit_trees(y, sample_weight, data, &splitter, evaluation_data)?;
         } else {

diff --git a/src/node.rs b/src/node.rs
@@ -184,10 +184,10 @@ impl SplittableNode {
         self.missing_node = missing_child;
         self.is_leaf = false;
     }
-    pub fn as_node(&self) -> Node {
+    pub fn as_node(&self, learning_rate: f32) -> Node {
         Node {
             num: self.num,
-            weight_value: self.weight_value,
+            weight_value: self.weight_value * learning_rate,
             hessian_sum: self.hessian_sum,
             depth: self.depth,
             missing_node: self.missing_node,

diff --git a/src/splitter.rs b/src/splitter.rs
@@ -7,7 +7,7 @@ use crate::histogram::HistogramMatrix;
 use crate::node::SplittableNode;
 use crate::tree::Tree;
 use crate::utils::{
-    constrained_weight, cull_gain, gain_given_weight, pivot_on_split,
+    between, bound_to_parent, constrained_weight, cull_gain, gain_given_weight, pivot_on_split,
     pivot_on_split_exclude_missing,
 };
 
@@ -87,6 +87,7 @@ pub trait Splitter {
         missing_hessian: f32,
         lower_bound: f32,
         upper_bound: f32,
+        parent_weight: f32,
         constraint: Option<&Constraint>,
     ) -> Option<(NodeInfo, NodeInfo, MissingInfo)>;
 
@@ -112,18 +113,18 @@ pub trait Splitter {
             let right_gradient = node.gradient_sum - cuml_grad - missing.gradient_sum;
             let right_hessian = node.hessian_sum - cuml_hess - missing.hessian_sum;
 
-            let (mut left_node_info, mut right_node_info, mut missing_info) = match self
-                .evaluate_split(
-                    left_gradient,
-                    left_hessian,
-                    right_gradient,
-                    right_hessian,
-                    missing.gradient_sum,
-                    missing.hessian_sum,
-                    node.lower_bound,
-                    node.upper_bound,
-                    constraint,
-                ) {
+            let (mut left_node_info, mut right_node_info, missing_info) = match self.evaluate_split(
+                left_gradient,
+                left_hessian,
+                right_gradient,
+                right_hessian,
+                missing.gradient_sum,
+                missing.hessian_sum,
+                node.lower_bound,
+                node.upper_bound,
+                node.weight_value,
+                constraint,
+            ) {
                 None => {
                     cuml_grad += bin.gradient_sum;
                     cuml_hess += bin.hessian_sum;
@@ -170,12 +171,7 @@ pub trait Splitter {
             };
             left_node_info.bounds = left_bounds;
             right_node_info.bounds = right_bounds;
-            // Apply shrinkage at this point...
-            left_node_info.weight *= self.get_learning_rate();
-            right_node_info.weight *= self.get_learning_rate();
-            if let MissingInfo::Branch(info) | MissingInfo::Leaf(info) = &mut missing_info {
-                info.weight *= self.get_learning_rate();
-            }
+
             // If split gain is NaN, one of the sides is empty, do not allow
             // this split.
             let split_gain = if split_gain.is_nan() { 0.0 } else { split_gain };
@@ -252,6 +248,7 @@ pub struct MissingBranchSplitter {
     pub constraints_map: ConstraintMap,
     pub terminate_missing_features: HashSet<usize>,
     pub missing_node_treatment: MissingNodeTreatment,
+    pub force_children_to_bound_parent: bool,
 }
 
 impl MissingBranchSplitter {
@@ -339,6 +336,7 @@ impl Splitter for MissingBranchSplitter {
         missing_hessian: f32,
         lower_bound: f32,
         upper_bound: f32,
+        parent_weight: f32,
         constraint: Option<&Constraint>,
     ) -> Option<(NodeInfo, NodeInfo, MissingInfo)> {
         // If there is no info right, or there is no
@@ -350,15 +348,15 @@ impl Splitter for MissingBranchSplitter {
             return None;
         }
 
-        let left_weight = constrained_weight(
+        let mut left_weight = constrained_weight(
             &self.l2,
             left_gradient,
             left_hessian,
             lower_bound,
             upper_bound,
             constraint,
         );
-        let right_weight = constrained_weight(
+        let mut right_weight = constrained_weight(
             &self.l2,
             right_gradient,
             right_hessian,
@@ -367,6 +365,12 @@ impl Splitter for MissingBranchSplitter {
             constraint,
         );
 
+        if self.force_children_to_bound_parent {
+            (left_weight, right_weight) = bound_to_parent(parent_weight, left_weight, right_weight);
+            assert!(between(lower_bound, upper_bound, left_weight));
+            assert!(between(lower_bound, upper_bound, right_weight));
+        }
+
         let left_gain = gain_given_weight(&self.l2, left_gradient, left_hessian, left_weight);
         let right_gain = gain_given_weight(&self.l2, right_gradient, right_hessian, right_weight);
 
@@ -726,6 +730,7 @@ impl Splitter for MissingImputerSplitter {
         missing_hessian: f32,
         lower_bound: f32,
         upper_bound: f32,
+        _parent_weight: f32,
         constraint: Option<&Constraint>,
     ) -> Option<(NodeInfo, NodeInfo, MissingInfo)> {
         // If there is no info right, or there is no

diff --git a/src/tree.rs b/src/tree.rs
@@ -83,7 +83,8 @@ impl Tree {
             f32::INFINITY,
         );
         // Add the first node to the tree nodes.
-        self.nodes.push(root_node.as_node());
+        self.nodes
+            .push(root_node.as_node(splitter.get_learning_rate()));
         let mut n_leaves = 1;
 
         let mut growable: Box<dyn Grower> = match grow_policy {
@@ -132,7 +133,7 @@ impl Tree {
                 n_leaves += n_new_nodes;
                 n_nodes += n_new_nodes;
                 for n in new_nodes {
-                    self.nodes.push(n.as_node());
+                    self.nodes.push(n.as_node(splitter.get_learning_rate()));
                     if !n.is_missing_leaf {
                         growable.add_node(n)
                     }