Skip to content

Commit

Permalink
Adding max delta step
Browse files Browse the repository at this point in the history
  • Loading branch information
jinlow committed Dec 12, 2023
1 parent 7aac829 commit 413f520
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 5 deletions.
1 change: 1 addition & 0 deletions benches/forust_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub fn tree_benchmarks(c: &mut Criterion) {
let splitter = MissingImputerSplitter {
l1: 0.0,
l2: 1.0,
max_delta_step: 0.,
gamma: 3.0,
min_leaf_weight: 1.0,
learning_rate: 0.3,
Expand Down
7 changes: 6 additions & 1 deletion py-forust/forust/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ def __init__(
l1: float = 0.0,
l2: float = 1.0,
gamma: float = 0.0,
max_delta_step: float = 0.0,
min_leaf_weight: float = 1.0,
base_score: float = 0.5,
nbins: int = 256,
Expand Down Expand Up @@ -327,6 +328,8 @@ def __init__(
l2 (float, optional): L2 regularization term applied to the weights of the tree. Valid values are 0 to infinity. Defaults to 1.0.
gamma (float, optional): The minimum amount of loss required to further split a node.
Valid values are 0 to infinity. Defaults to 0.0.
max_delta_step (float, optional): Maximum delta step allowed at each leaf. This is the maximum magnitude a
leaf can take. Setting to 0 results in no constrain. Defaults to 0..
min_leaf_weight (float, optional): Minimum sum of the hessian values of the loss function
required to be in a node. Defaults to 1.0.
base_score (float, optional): The initial prediction value of the model. If `initialize_base_score`
Expand Down Expand Up @@ -459,6 +462,7 @@ def __init__(
l1=l1,
l2=l2,
gamma=gamma,
max_delta_step=max_delta_step,
min_leaf_weight=min_leaf_weight,
base_score=base_score,
nbins=nbins,
Expand Down Expand Up @@ -494,6 +498,7 @@ def __init__(
self.l1 = l1
self.l2 = l2
self.gamma = gamma
self.max_delta_step = max_delta_step
self.min_leaf_weight = min_leaf_weight
with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand Down Expand Up @@ -1076,7 +1081,7 @@ def __setstate__(self, d: dict[Any, Any]) -> None:
# that would have been loaded in as defaults on the json object?
# This makes sure that defaults set with a serde default function get
# carried through to the python object.
for p, v in booster_object.get_params():
for p, v in booster_object.get_params().items():
if p not in d:
d[p] = v
del d["__booster_json_file__"]
Expand Down
4 changes: 4 additions & 0 deletions py-forust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ impl GradientBooster {
l1,
l2,
gamma,
max_delta_step,
min_leaf_weight,
base_score,
nbins,
Expand Down Expand Up @@ -92,6 +93,7 @@ impl GradientBooster {
l1: f32,
l2: f32,
gamma: f32,
max_delta_step: f32,
min_leaf_weight: f32,
base_score: f64,
nbins: u16,
Expand Down Expand Up @@ -137,6 +139,7 @@ impl GradientBooster {
l1,
l2,
gamma,
max_delta_step,
min_leaf_weight,
base_score,
nbins,
Expand Down Expand Up @@ -383,6 +386,7 @@ impl GradientBooster {
("l1", self.booster.l1.to_object(py)),
("l2", self.booster.l2.to_object(py)),
("gamma", self.booster.gamma.to_object(py)),
("max_delta_step", self.booster.max_delta_step.to_object(py)),
(
"min_leaf_weight",
self.booster.min_leaf_weight.to_object(py),
Expand Down
64 changes: 64 additions & 0 deletions py-forust/tests/test_booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,70 @@ def test_booster_to_xgboosts_l1(X_y, l1):
assert not np.allclose(fmod2_preds, fmod_preds, atol=0.0001)


@pytest.mark.parametrize("max_delta_step", [0.0, 1.0, 2.0])
def test_booster_to_xgboosts_max_delta_step(X_y, max_delta_step):
# Small differences in the spits make a big difference
# when l1 is used.
X, y = X_y
c = X.columns
X = X[c].fillna(0)
xmod = XGBClassifier(
n_estimators=5,
learning_rate=0.3,
max_depth=5,
reg_lambda=1,
min_child_weight=1.0,
gamma=0,
max_delta_step=max_delta_step,
objective="binary:logitraw",
tree_method="exact",
)
xmod.fit(X, y)
xmod_preds = xmod.predict(X, output_margin=True)

fmod = GradientBooster(
base_score=0.5,
iterations=5,
learning_rate=0.3,
max_depth=5,
l2=1,
max_delta_step=max_delta_step,
min_leaf_weight=1.0,
gamma=0,
objective_type="LogLoss",
initialize_base_score=False,
)
fmod.fit(X, y=y)
fmod_preds = fmod.predict(X)
assert np.allclose(fmod_preds, xmod_preds, atol=0.0001)

# Model trained without is different.
if max_delta_step > 0:
# The nodes weights will be maxed out at max_delta_step*learning_rate
max_w = []
for tree in fmod.get_node_lists():
max_w.append(max(abs(n.weight_value) for n in tree))
assert max(max_w) <= max_delta_step * 0.3
fmod2 = GradientBooster(
base_score=0.5,
iterations=5,
learning_rate=0.3,
max_depth=5,
l2=1,
min_leaf_weight=1.0,
gamma=0,
objective_type="LogLoss",
initialize_base_score=False,
)
fmod2.fit(X, y=y)
fmod2_preds = fmod2.predict(X)
assert not np.allclose(fmod2_preds, fmod_preds, atol=0.0001)
max_w = []
for tree in fmod2.get_node_lists():
max_w.append(max(abs(n.weight_value) for n in tree))
assert max(max_w) > max_delta_step * 0.3


def test_sklearn_clone(X_y):
X, y = X_y
fmod = GradientBooster(
Expand Down
12 changes: 12 additions & 0 deletions src/gradientbooster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ pub struct GradientBooster {
/// The minimum amount of loss required to further split a node.
/// Valid values are 0 to infinity.
pub gamma: f32,
/// Maximum delta step allowed at each leaf. This is the maximum magnitude a leaf can take. Setting to 0 results in no constrain.
#[serde(default = "default_max_delta_step")]
pub max_delta_step: f32,
/// Minimum sum of the hessian values of the loss function
/// required to be in a node.
pub min_leaf_weight: f32,
Expand Down Expand Up @@ -188,6 +191,9 @@ pub struct GradientBooster {
fn default_l1() -> f32 {
0.0
}
fn default_max_delta_step() -> f32 {
0.0
}

fn default_initialize_base_score() -> bool {
false
Expand Down Expand Up @@ -256,6 +262,7 @@ impl Default for GradientBooster {
0.,
1.,
0.,
0.,
1.,
0.5,
256,
Expand Down Expand Up @@ -337,6 +344,7 @@ impl GradientBooster {
l1: f32,
l2: f32,
gamma: f32,
max_delta_step: f32,
min_leaf_weight: f32,
base_score: f64,
nbins: u16,
Expand Down Expand Up @@ -369,6 +377,7 @@ impl GradientBooster {
l1,
l2,
gamma,
max_delta_step,
min_leaf_weight,
base_score,
nbins,
Expand Down Expand Up @@ -406,6 +415,7 @@ impl GradientBooster {
validate_positive_float_field!(self.l1);
validate_positive_float_field!(self.l2);
validate_positive_float_field!(self.gamma);
validate_positive_float_field!(self.max_delta_step);
validate_positive_float_field!(self.min_leaf_weight);
validate_positive_float_field!(self.subsample);
validate_positive_float_field!(self.top_rate);
Expand Down Expand Up @@ -436,6 +446,7 @@ impl GradientBooster {
let splitter = MissingBranchSplitter {
l1: self.l1,
l2: self.l2,
max_delta_step: self.max_delta_step,
gamma: self.gamma,
min_leaf_weight: self.min_leaf_weight,
learning_rate: self.learning_rate,
Expand All @@ -450,6 +461,7 @@ impl GradientBooster {
let splitter = MissingImputerSplitter {
l1: self.l1,
l2: self.l2,
max_delta_step: self.max_delta_step,
gamma: self.gamma,
min_leaf_weight: self.min_leaf_weight,
learning_rate: self.learning_rate,
Expand Down
1 change: 1 addition & 0 deletions src/partial_dependence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ mod tests {
let splitter = MissingImputerSplitter {
l1: 0.0,
l2: 1.0,
max_delta_step: 0.,
gamma: 3.0,
min_leaf_weight: 1.0,
learning_rate: 0.3,
Expand Down
31 changes: 30 additions & 1 deletion src/splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ pub trait Splitter {
fn get_gamma(&self) -> f32;
fn get_l1(&self) -> f32;
fn get_l2(&self) -> f32;
fn get_max_delta_step(&self) -> f32;
fn get_learning_rate(&self) -> f32;

/// Perform any post processing on the tree that is
Expand Down Expand Up @@ -247,6 +248,7 @@ pub trait Splitter {
pub struct MissingBranchSplitter {
pub l1: f32,
pub l2: f32,
pub max_delta_step: f32,
pub gamma: f32,
pub min_leaf_weight: f32,
pub learning_rate: f32,
Expand Down Expand Up @@ -334,6 +336,9 @@ impl Splitter for MissingBranchSplitter {
fn get_l2(&self) -> f32 {
self.l2
}
fn get_max_delta_step(&self) -> f32 {
self.max_delta_step
}

fn get_learning_rate(&self) -> f32 {
self.learning_rate
Expand Down Expand Up @@ -364,6 +369,7 @@ impl Splitter for MissingBranchSplitter {
let mut left_weight = constrained_weight(
&self.l1,
&self.l2,
&self.max_delta_step,
left_gradient,
left_hessian,
lower_bound,
Expand All @@ -373,6 +379,7 @@ impl Splitter for MissingBranchSplitter {
let mut right_weight = constrained_weight(
&self.l1,
&self.l2,
&self.max_delta_step,
right_gradient,
right_hessian,
lower_bound,
Expand Down Expand Up @@ -405,6 +412,7 @@ impl Splitter for MissingBranchSplitter {
MissingNodeTreatment::AssignToParent => constrained_weight(
&self.get_l1(),
&self.get_l2(),
&self.max_delta_step,
missing_gradient + left_gradient + right_gradient,
missing_hessian + left_hessian + right_hessian,
lower_bound,
Expand All @@ -426,6 +434,7 @@ impl Splitter for MissingBranchSplitter {
constrained_weight(
&self.get_l1(),
&self.get_l2(),
&self.max_delta_step,
missing_gradient,
missing_hessian,
lower_bound,
Expand Down Expand Up @@ -712,6 +721,7 @@ impl Splitter for MissingBranchSplitter {
pub struct MissingImputerSplitter {
pub l1: f32,
pub l2: f32,
pub max_delta_step: f32,
pub gamma: f32,
pub min_leaf_weight: f32,
pub learning_rate: f32,
Expand All @@ -721,9 +731,11 @@ pub struct MissingImputerSplitter {

impl MissingImputerSplitter {
/// Generate a new missing imputer splitter object.
#[allow(clippy::too_many_arguments)]
pub fn new(
l1: f32,
l2: f32,
max_delta_step: f32,
gamma: f32,
min_leaf_weight: f32,
learning_rate: f32,
Expand All @@ -733,6 +745,7 @@ impl MissingImputerSplitter {
MissingImputerSplitter {
l1,
l2,
max_delta_step,
gamma,
min_leaf_weight,
learning_rate,
Expand All @@ -758,6 +771,9 @@ impl Splitter for MissingImputerSplitter {
fn get_l2(&self) -> f32 {
self.l2
}
fn get_max_delta_step(&self) -> f32 {
self.max_delta_step
}

fn get_learning_rate(&self) -> f32 {
self.learning_rate
Expand Down Expand Up @@ -799,6 +815,7 @@ impl Splitter for MissingImputerSplitter {
let mut left_weight = constrained_weight(
&self.l1,
&self.l2,
&self.max_delta_step,
left_gradient,
left_hessian,
lower_bound,
Expand All @@ -808,6 +825,7 @@ impl Splitter for MissingImputerSplitter {
let mut right_weight = constrained_weight(
&self.l1,
&self.l2,
&self.max_delta_step,
right_gradient,
right_hessian,
lower_bound,
Expand Down Expand Up @@ -839,6 +857,7 @@ impl Splitter for MissingImputerSplitter {
let missing_left_weight = constrained_weight(
&self.l1,
&self.l2,
&self.max_delta_step,
left_gradient + missing_gradient,
left_hessian + missing_hessian,
lower_bound,
Expand All @@ -864,6 +883,7 @@ impl Splitter for MissingImputerSplitter {
let missing_right_weight = constrained_weight(
&self.l1,
&self.l2,
&self.max_delta_step,
right_gradient + missing_gradient,
right_hessian + missing_hessian,
lower_bound,
Expand Down Expand Up @@ -1051,6 +1071,7 @@ mod tests {
let splitter = MissingImputerSplitter {
l1: 0.0,
l2: 0.0,
max_delta_step: 0.,
gamma: 0.0,
min_leaf_weight: 0.0,
learning_rate: 1.0,
Expand Down Expand Up @@ -1098,6 +1119,7 @@ mod tests {
let splitter = MissingImputerSplitter {
l1: 0.0,
l2: 0.0,
max_delta_step: 0.,
gamma: 0.0,
min_leaf_weight: 0.0,
learning_rate: 1.0,
Expand Down Expand Up @@ -1144,6 +1166,7 @@ mod tests {
let splitter = MissingImputerSplitter {
l1: 0.0,
l2: 1.0,
max_delta_step: 0.,
gamma: 3.0,
min_leaf_weight: 1.0,
learning_rate: 0.3,
Expand All @@ -1152,7 +1175,13 @@ mod tests {
};
let gradient_sum = grad.iter().copied().sum();
let hessian_sum = hess.iter().copied().sum();
let root_weight = weight(&splitter.l1, &splitter.l2, gradient_sum, hessian_sum);
let root_weight = weight(
&splitter.l1,
&splitter.l2,
&splitter.max_delta_step,
gradient_sum,
hessian_sum,
);
let root_gain = gain(&splitter.l2, gradient_sum, hessian_sum);
let data = Matrix::new(&data_vec, 891, 5);

Expand Down
Loading

0 comments on commit 413f520

Please sign in to comment.