Skip to content

Commit

Permalink
Add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jinlow committed Dec 9, 2023
1 parent d08cb0a commit 56518aa
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 14 deletions.
3 changes: 2 additions & 1 deletion py-forust/forust/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,8 @@ def get_node_lists(self, map_features_names: bool = True) -> list[list[Node]]:
for t in model:
tree = []
for n in t["nodes"]:
n["split_feature"] = feature_map[n["split_feature"]]
if not n["is_leaf"]:
n["split_feature"] = feature_map[n["split_feature"]]
tree.append(Node(**n))
trees.append(tree)
return trees
Expand Down
32 changes: 28 additions & 4 deletions py-forust/tests/test_booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from xgboost import XGBClassifier, XGBRegressor

import forust
from forust import GradientBooster
from forust import GradientBooster, Node


def loggodds_to_odds(v):
Expand Down Expand Up @@ -116,18 +116,42 @@ def test_multiple_fit_calls(X_y):
assert np.allclose(fmod_preds, fmod_fit_again_preds)


def test_colsample_bytree(X_y):
@pytest.mark.parametrize(
"colsample_bytree,create_missing_branch",
list(itertools.product([0.25, 0.5, 0.75], [True, False])),
)
def test_colsample_bytree(X_y, colsample_bytree, create_missing_branch):
X, y = X_y
fmod1 = GradientBooster()
fmod1 = GradientBooster(create_missing_branch=create_missing_branch)
fmod1.fit(X, y=y)
fmod1_preds = fmod1.predict(X)

fmod2 = GradientBooster(colsample_bytree=0.5)
fmod2 = GradientBooster(
colsample_bytree=colsample_bytree, create_missing_branch=create_missing_branch
)
fmod2.fit(X, y=y)
fmod2_preds = fmod2.predict(X)

assert not np.allclose(fmod1_preds, fmod2_preds)

# Assert than every tree, has only 50% or less of the features.
trees = fmod2.get_node_lists()

def gather_feature_names(
node: Node, tree: list[Node], features: set[str | int]
) -> None:
if not node.is_leaf:
features.add(node.split_feature)
gather_feature_names(tree[node.right_child], tree, features)
gather_feature_names(tree[node.left_child], tree, features)
gather_feature_names(tree[node.missing_node], tree, features)

for tree in trees:
features = set()
gather_feature_names(tree[0], tree, features)
assert len(features) > 0
assert len(features) <= (len(X.columns) * colsample_bytree)


def test_different_data_passed(X_y):
X, y = X_y
Expand Down
27 changes: 19 additions & 8 deletions src/gradientbooster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ use crate::tree::Tree;
use crate::utils::{fmt_vec_output, odds, validate_positive_float_field};
use log::info;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use rand::seq::IteratorRandom;
use rand::SeedableRng;
use rayon::prelude::*;
use serde::{Deserialize, Deserializer, Serialize};
use std::collections::{HashMap, HashSet};
Expand Down Expand Up @@ -540,20 +541,30 @@ impl GradientBooster {
let mut tree = Tree::new();

// If we are doing any column sampling...
let fit_col_index = if self.colsample_bytree == 1.0 {
col_index.to_vec()
let colsample_index: Vec<usize> = if self.colsample_bytree == 1.0 {
Vec::new()
} else {
col_index
let amount = ((col_index.len() as f64) * self.colsample_bytree).floor() as usize;
let mut v: Vec<usize> = col_index
.iter()
.filter(|_| rng.gen_range(0.0..1.0) < self.colsample_bytree)
.copied()
.collect()
.choose_multiple(&mut rng, amount)
.iter()
.map(|i| **i)
.collect();
v.sort();
v
};

let fit_col_index = if self.colsample_bytree == 1.0 {
&col_index
} else {
&colsample_index
};

tree.fit(
&bdata,
chosen_index,
&fit_col_index,
fit_col_index,
&binned_data.cuts,
&grad,
&hess,
Expand Down
7 changes: 6 additions & 1 deletion src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ mod tests {

let b = bin_matrix(&data, &w, 300, f64::NAN).unwrap();
let bdata = Matrix::new(&b.binned_data, data.rows, data.cols);
let col_index: Vec<usize> = vec![2, 3, 4];
let col_index: Vec<usize> = vec![1, 3];
tree.fit(
&bdata,
data.index.to_owned(),
Expand All @@ -733,6 +733,11 @@ mod tests {
&SampleMethod::None,
&GrowPolicy::DepthWise,
);
for n in tree.nodes {
if !n.is_leaf {
assert!((n.split_feature == 1) || (n.split_feature == 3))
}
}
}

#[test]
Expand Down

0 comments on commit 56518aa

Please sign in to comment.