Skip to content

Commit

Permalink
Merge pull request #64 from jinlow/force-chilred-to-contain-parent
Browse files Browse the repository at this point in the history
force children to contain parent weight
  • Loading branch information
jinlow authored Aug 23, 2023
2 parents a2b1e17 + 5978b88 commit 145f705
Show file tree
Hide file tree
Showing 11 changed files with 144 additions and 33 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "forust-ml"
version = "0.2.20"
version = "0.2.21"
edition = "2021"
authors = ["James Inlow <[email protected]>"]
homepage = "https://github.com/jinlow/forust"
Expand All @@ -24,7 +24,7 @@ log = "0.4.19"

[dev-dependencies]
criterion = "0.5"
polars = "0.29"
polars = "0.32.1"
reqwest = { version = "0.11", features = ["blocking"] }

[[bench]]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pip install forust

To use in a rust project add the following to your Cargo.toml file.
```toml
forust-ml = "0.2.20"
forust-ml = "0.2.21"
```

## Usage
Expand Down
4 changes: 2 additions & 2 deletions py-forust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "py-forust"
version = "0.2.20"
version = "0.2.21"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -10,7 +10,7 @@ crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.19.0", features = ["extension-module"] }
forust-ml = { version = "0.2.20", path = "../" }
forust-ml = { version = "0.2.21", path = "../" }
numpy = "0.19.0"
ndarray = "0.15.1"
serde_plain = { version = "1.0" }
Expand Down
34 changes: 32 additions & 2 deletions py-forust/forust/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class BoosterType(Protocol):
monotone_constraints: dict[int, int]
prediction_iteration: None | int
best_iteration: None | int
base_score: float
terminate_missing_features: set[int]

def fit(
Expand Down Expand Up @@ -190,6 +191,7 @@ def __init__(
terminate_missing_features: Iterable[Any] | None = None,
missing_node_treatment: str = "AssignToParent",
log_iterations: int = 0,
force_children_to_bound_parent: bool = False,
):
"""Gradient Booster Class, used to generate gradient boosted decision tree ensembles.
Expand Down Expand Up @@ -270,6 +272,7 @@ def __init__(
- "AverageLeafWeight": After training each tree, starting from the bottom of the tree, assign the missing node weight to the weighted average of the left and right child nodes. Next assign the parent to the weighted average of the children nodes. This is performed recursively up through the entire tree. This is performed as a post processing step on each tree after it is built, and prior to updating the predictions for which to train the next tree.
- "AverageNodeWeight": Set the missing node to be equal to the weighted average weight of the left and the right nodes.
log_iterations (bool, optional): Setting to a value (N) other than zero will result in information being logged about ever N iterations, info can be interacted with directly with the python [`logging`](https://docs.python.org/3/howto/logging.html) module. For an example of how to utilize the logging information see the example [here](/#logging-output).
force_children_to_bound_parent (bool, optional): Setting this parameter to `True` will restrict chilren nodes, so that they always contain the parent node inside of their range. Without setting this it's possible that both, the left and the right nodes could be greater, than or less than, the parent node. Defaults to `False`.
Raises:
TypeError: Raised if an invalid dtype is passed.
Expand Down Expand Up @@ -347,6 +350,7 @@ def __init__(
terminate_missing_features=set(),
missing_node_treatment=missing_node_treatment,
log_iterations=log_iterations,
force_children_to_bound_parent=force_children_to_bound_parent,
)
monotone_constraints_ = (
{} if monotone_constraints is None else monotone_constraints
Expand All @@ -360,7 +364,8 @@ def __init__(
self.l2 = l2
self.gamma = gamma
self.min_leaf_weight = min_leaf_weight
self.base_score = base_score
# Use booster getter, as it's more dynamic
# self.base_score = base_score
self.nbins = nbins
self.parallel = parallel
self.allow_missing_splits = allow_missing_splits
Expand All @@ -377,6 +382,8 @@ def __init__(
self.initialize_base_score = initialize_base_score
self.terminate_missing_features = terminate_missing_features_
self.missing_node_treatment = missing_node_treatment
self.log_iterations = log_iterations
self.force_children_to_bound_parent = force_children_to_bound_parent

def fit(
self,
Expand Down Expand Up @@ -551,7 +558,7 @@ def set_prediction_iteration(self, iteration: int):
Args:
iteration (int): Iteration number to use, this will use all trees, up to this
index.
index. Setting this to 10, would result in trees 0 through 9 used for predictions.
"""
self.booster.prediction_iteration = iteration

Expand Down Expand Up @@ -843,6 +850,29 @@ def get_evaluation_history(self) -> np.ndarray | None:
r, v, d = self.booster.get_evaluation_history()
return d.reshape((r, v))

@property
def best_iteration(self) -> int | None:
"""Get the best iteration if `early_stopping_rounds` was used when fitting.
Returns:
int | None: The best iteration, or None if `early_stopping_rounds` wasn't used.
"""
return self.booster.best_iteration

@property
def base_score(self) -> float:
"""Base score used as initial prediction value"""
return self.booster.base_score

@property
def prediction_iteration(self) -> int | None:
"""The prediction_iteration that will be used when predicting, up to this many trees will be used.
Returns:
int | None: Int if this is set, otherwise, None, in which case all trees will be used.
"""
return self.booster.prediction_iteration

def get_best_iteration(self) -> int | None:
"""Get the best iteration if `early_stopping_rounds` was used when fitting.
Expand Down
17 changes: 17 additions & 0 deletions py-forust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ impl GradientBooster {
terminate_missing_features,
missing_node_treatment,
log_iterations,
force_children_to_bound_parent,
))]
pub fn new(
objective_type: &str,
Expand Down Expand Up @@ -108,6 +109,7 @@ impl GradientBooster {
terminate_missing_features: HashSet<usize>,
missing_node_treatment: &str,
log_iterations: usize,
force_children_to_bound_parent: bool,
) -> PyResult<Self> {
let constraints = int_map_to_constraint_map(monotone_constraints)?;
let objective_ = to_value_error(serde_plain::from_str(objective_type))?;
Expand Down Expand Up @@ -150,6 +152,7 @@ impl GradientBooster {
terminate_missing_features,
missing_node_treatment_,
log_iterations,
force_children_to_bound_parent,
);
Ok(GradientBooster {
booster: to_value_error(booster)?,
Expand All @@ -175,11 +178,21 @@ impl GradientBooster {
Ok(())
}

#[getter]
fn prediction_iteration(&self) -> PyResult<Option<usize>> {
Ok(self.booster.prediction_iteration)
}

#[getter]
fn best_iteration(&self) -> PyResult<Option<usize>> {
Ok(self.booster.best_iteration)
}

#[getter]
fn base_score(&self) -> PyResult<f64> {
Ok(self.booster.base_score)
}

pub fn fit(
&mut self,
flat_data: PyReadonlyArray1<f64>,
Expand Down Expand Up @@ -399,6 +412,10 @@ impl GradientBooster {
missing_node_treatment_.to_object(py),
),
("log_iterations", self.booster.log_iterations.to_object(py)),
(
"force_children_to_bound_parent",
self.booster.force_children_to_bound_parent.to_object(py),
),
];
let dict = key_vals.into_py_dict(py);
Ok(dict.to_object(py))
Expand Down
2 changes: 1 addition & 1 deletion rs-example.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
To run this example, add the following code to your `Cargo.toml` file.
```toml
[dependencies]
forust-ml = "0.2.20"
forust-ml = "0.2.21"
polars = "0.28"
reqwest = { version = "0.11", features = ["blocking"] }
```
Expand Down
10 changes: 10 additions & 0 deletions src/gradientbooster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ pub struct GradientBooster {
/// Should the model be trained showing output.
#[serde(default = "default_log_iterations")]
pub log_iterations: usize,
/// Should the children nodes contain the parent node in their bounds, setting this to true, will result in no children being created that result in the higher and lower child values both being greater than, or less than the parent weight.
#[serde(default = "default_force_children_to_bound_parent")]
pub force_children_to_bound_parent: bool,
// Members internal to the booster object, and not parameters set by the user.
// Trees is public, just to interact with it directly in the python wrapper.
pub trees: Vec<Tree>,
Expand Down Expand Up @@ -214,6 +217,9 @@ fn default_missing_node_treatment() -> MissingNodeTreatment {
fn default_log_iterations() -> usize {
0
}
fn default_force_children_to_bound_parent() -> bool {
false
}

fn parse_missing<'de, D>(d: D) -> Result<f64, D::Error>
where
Expand Down Expand Up @@ -252,6 +258,7 @@ impl Default for GradientBooster {
HashSet::new(),
MissingNodeTreatment::AssignToParent,
0,
false,
)
.unwrap()
}
Expand Down Expand Up @@ -329,6 +336,7 @@ impl GradientBooster {
terminate_missing_features: HashSet<usize>,
missing_node_treatment: MissingNodeTreatment,
log_iterations: usize,
force_children_to_bound_parent: bool,
) -> Result<Self, ForustError> {
let (base_score_, initialize_base_score_) = match base_score {
Some(v) => (v, initialize_base_score),
Expand Down Expand Up @@ -365,6 +373,7 @@ impl GradientBooster {
prediction_iteration: None,
missing_node_treatment,
log_iterations,
force_children_to_bound_parent,
trees: Vec::new(),
metadata: HashMap::new(),
};
Expand Down Expand Up @@ -411,6 +420,7 @@ impl GradientBooster {
constraints_map,
terminate_missing_features: self.terminate_missing_features.clone(),
missing_node_treatment: self.missing_node_treatment,
force_children_to_bound_parent: self.force_children_to_bound_parent,
};
self.fit_trees(y, sample_weight, data, &splitter, evaluation_data)?;
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,10 @@ impl SplittableNode {
self.missing_node = missing_child;
self.is_leaf = false;
}
pub fn as_node(&self) -> Node {
pub fn as_node(&self, learning_rate: f32) -> Node {
Node {
num: self.num,
weight_value: self.weight_value,
weight_value: self.weight_value * learning_rate,
hessian_sum: self.hessian_sum,
depth: self.depth,
missing_node: self.missing_node,
Expand Down
47 changes: 26 additions & 21 deletions src/splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::histogram::HistogramMatrix;
use crate::node::SplittableNode;
use crate::tree::Tree;
use crate::utils::{
constrained_weight, cull_gain, gain_given_weight, pivot_on_split,
between, bound_to_parent, constrained_weight, cull_gain, gain_given_weight, pivot_on_split,
pivot_on_split_exclude_missing,
};

Expand Down Expand Up @@ -87,6 +87,7 @@ pub trait Splitter {
missing_hessian: f32,
lower_bound: f32,
upper_bound: f32,
parent_weight: f32,
constraint: Option<&Constraint>,
) -> Option<(NodeInfo, NodeInfo, MissingInfo)>;

Expand All @@ -112,18 +113,18 @@ pub trait Splitter {
let right_gradient = node.gradient_sum - cuml_grad - missing.gradient_sum;
let right_hessian = node.hessian_sum - cuml_hess - missing.hessian_sum;

let (mut left_node_info, mut right_node_info, mut missing_info) = match self
.evaluate_split(
left_gradient,
left_hessian,
right_gradient,
right_hessian,
missing.gradient_sum,
missing.hessian_sum,
node.lower_bound,
node.upper_bound,
constraint,
) {
let (mut left_node_info, mut right_node_info, missing_info) = match self.evaluate_split(
left_gradient,
left_hessian,
right_gradient,
right_hessian,
missing.gradient_sum,
missing.hessian_sum,
node.lower_bound,
node.upper_bound,
node.weight_value,
constraint,
) {
None => {
cuml_grad += bin.gradient_sum;
cuml_hess += bin.hessian_sum;
Expand Down Expand Up @@ -170,12 +171,7 @@ pub trait Splitter {
};
left_node_info.bounds = left_bounds;
right_node_info.bounds = right_bounds;
// Apply shrinkage at this point...
left_node_info.weight *= self.get_learning_rate();
right_node_info.weight *= self.get_learning_rate();
if let MissingInfo::Branch(info) | MissingInfo::Leaf(info) = &mut missing_info {
info.weight *= self.get_learning_rate();
}

// If split gain is NaN, one of the sides is empty, do not allow
// this split.
let split_gain = if split_gain.is_nan() { 0.0 } else { split_gain };
Expand Down Expand Up @@ -252,6 +248,7 @@ pub struct MissingBranchSplitter {
pub constraints_map: ConstraintMap,
pub terminate_missing_features: HashSet<usize>,
pub missing_node_treatment: MissingNodeTreatment,
pub force_children_to_bound_parent: bool,
}

impl MissingBranchSplitter {
Expand Down Expand Up @@ -339,6 +336,7 @@ impl Splitter for MissingBranchSplitter {
missing_hessian: f32,
lower_bound: f32,
upper_bound: f32,
parent_weight: f32,
constraint: Option<&Constraint>,
) -> Option<(NodeInfo, NodeInfo, MissingInfo)> {
// If there is no info right, or there is no
Expand All @@ -350,15 +348,15 @@ impl Splitter for MissingBranchSplitter {
return None;
}

let left_weight = constrained_weight(
let mut left_weight = constrained_weight(
&self.l2,
left_gradient,
left_hessian,
lower_bound,
upper_bound,
constraint,
);
let right_weight = constrained_weight(
let mut right_weight = constrained_weight(
&self.l2,
right_gradient,
right_hessian,
Expand All @@ -367,6 +365,12 @@ impl Splitter for MissingBranchSplitter {
constraint,
);

if self.force_children_to_bound_parent {
(left_weight, right_weight) = bound_to_parent(parent_weight, left_weight, right_weight);
assert!(between(lower_bound, upper_bound, left_weight));
assert!(between(lower_bound, upper_bound, right_weight));
}

let left_gain = gain_given_weight(&self.l2, left_gradient, left_hessian, left_weight);
let right_gain = gain_given_weight(&self.l2, right_gradient, right_hessian, right_weight);

Expand Down Expand Up @@ -726,6 +730,7 @@ impl Splitter for MissingImputerSplitter {
missing_hessian: f32,
lower_bound: f32,
upper_bound: f32,
_parent_weight: f32,
constraint: Option<&Constraint>,
) -> Option<(NodeInfo, NodeInfo, MissingInfo)> {
// If there is no info right, or there is no
Expand Down
5 changes: 3 additions & 2 deletions src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ impl Tree {
f32::INFINITY,
);
// Add the first node to the tree nodes.
self.nodes.push(root_node.as_node());
self.nodes
.push(root_node.as_node(splitter.get_learning_rate()));
let mut n_leaves = 1;

let mut growable: Box<dyn Grower> = match grow_policy {
Expand Down Expand Up @@ -132,7 +133,7 @@ impl Tree {
n_leaves += n_new_nodes;
n_nodes += n_new_nodes;
for n in new_nodes {
self.nodes.push(n.as_node());
self.nodes.push(n.as_node(splitter.get_learning_rate()));
if !n.is_missing_leaf {
growable.add_node(n)
}
Expand Down
Loading

0 comments on commit 145f705

Please sign in to comment.