diff --git a/docs/articles/machine_learning.html b/docs/articles/machine_learning.html index bd58db858..e5a84d3bc 100644 --- a/docs/articles/machine_learning.html +++ b/docs/articles/machine_learning.html @@ -585,11 +585,11 @@


            Term           Contrast Estimate
  bill_length_mm +1                    0.000
- island         Dream - Biscoe        0.154
- island         Torgersen - Biscoe   -1.900
- sex            male - female         2.724
- species        Chinstrap - Adelie    6.035
- species        Gentoo - Adelie       5.933
+ island         Dream - Biscoe        0.227
+ island         Torgersen - Biscoe   -2.209
+ sex            male - female         2.605
+ species        Chinstrap - Adelie    6.088
+ species        Gentoo - Adelie       6.112
 
 Columns: term, contrast, estimate 
 Type:  numeric 
@@ -645,35 +645,44 @@

library(modelsummary)
 
-recipe <- penguins |>
+# pre-processing
+pre <- penguins |>
     recipe(sex ~ ., data = _) |>
     step_ns(bill_length_mm, deg_free = 4) |>
     step_dummy(all_nominal_predictors())
 
+# modelling strategies
 models <- list(
-  logit = logistic_reg(mode = "classification", engine = "glm"),
-  forest = rand_forest(mode = "classification", engine = "ranger"),
-  xgb = boost_tree(mode = "classification", engine = "xgboost")
+  "Logit" = logistic_reg(mode = "classification", engine = "glm"),
+  "Random Forest" = rand_forest(mode = "classification", engine = "ranger"),
+  "XGBooost" = boost_tree(mode = "classification", engine = "xgboost")
 )
 
-lapply(models, \(x) {
-  recipe |>
+# fit to data
+fits <- lapply(models, \(x) {
+  pre |>
   workflow(spec = x) |>
-  fit(penguins) |>
-  avg_comparisons(newdata = penguins, type = "prob") }) |>
-  modelsummary(
-    shape = term + contrast + group ~ model,
-    coef_omit = "sex",
-    coef_rename = coef_rename)
+ fit(penguins) +}) + +# marginaleffects +cmp <- lapply(fits, avg_comparisons, newdata = penguins, type = "prob") + +# summary table +modelsummary( + cmp, + shape = term + contrast + group ~ model, + coef_omit = "sex", + coef_rename = coef_rename)
- - - + + + @@ -681,7 +690,7 @@

- + @@ -697,7 +706,7 @@

- + @@ -713,7 +722,7 @@

- + @@ -729,7 +738,7 @@

- + @@ -745,7 +754,7 @@

- + @@ -761,7 +770,7 @@

- + @@ -777,7 +786,7 @@

- + @@ -793,7 +802,7 @@

- + @@ -809,7 +818,7 @@

- + @@ -825,7 +834,7 @@

- + @@ -890,37 +899,37 @@


        Term      Contrast Estimate
  count      +1               0.000
- holiday    False - True    12.556
- humidity   +1             -23.124
- month      +1               3.892
- season     spring - fall  -31.058
- season     summer - fall   -8.917
- season     winter - fall    2.506
- temp       +1               3.409
- weather    misty - clear   -7.332
- weather    rain - clear   -58.989
- weekday    Fri - Sun       74.481
- weekday    Mon - Sun       82.925
- weekday    Sat - Sun       23.244
- weekday    Thu - Sun       90.207
- weekday    Tue - Sun       88.818
- weekday    Wed - Sun       91.096
- windspeed  +1               0.221
- workingday False - True  -187.642
- year       1 - 0           99.448
+ holiday    False - True    13.101
+ humidity   +1             -23.369
+ month      +1               3.902
+ season     spring - fall  -29.587
+ season     summer - fall   -8.385
+ season     winter - fall    3.458
+ temp       +1               3.701
+ weather    misty - clear   -7.973
+ weather    rain - clear   -59.693
+ weekday    Fri - Sun       69.059
+ weekday    Mon - Sun       77.247
+ weekday    Sat - Sun       20.396
+ weekday    Thu - Sun       84.326
+ weekday    Tue - Sun       83.109
+ weekday    Wed - Sun       85.338
+ windspeed  +1               0.136
+ workingday False - True  -190.696
+ year       1 - 0           97.664
 
 Columns: term, contrast, estimate 
 Type:  response 
-

These results are easy to interpret: An increase of 1 degree Celsius in the temperature is associated with an increase of 3.409 bikes rented per hour.

+

These results are easy to interpret: An increase of 1 degree Celsius in the temperature is associated with an increase of 3.701 bikes rented per hour.

We could obtain the same result manually as follows:

lo <- transform(bikes, temp = temp - 0.5)
 hi <- transform(bikes, temp = temp + 0.5)
 mean(predict(forest, newdata = hi) - predict(forest, newdata = lo))
-
[1] 3.40943
+
[1] 3.70069

@@ -934,7 +943,7 @@


  Term Contrast Estimate
- temp   custom      2.3
+ temp   custom     2.43
 
 Columns: term, contrast, estimate 
 Type:  response 
@@ -949,7 +958,7 @@

feature = "temp", step.size = 1)$ame

-
[1] 2.30096
+
[1] 2.434684

With marginaleffects::avg_comparisons(), we can also compute the average effect of a simultaneous change in multiple predictors, using the variables and cross arguments. In this example, we see what happens (on average) to the predicted outcome when the temp, season, and weather predictors all change together:

@@ -962,12 +971,12 @@


  Estimate     C: season C: temp    C: weather
-   -34.75 spring - fall      +1 misty - clear
-   -77.35 spring - fall      +1 rain - clear 
-   -12.91 summer - fall      +1 misty - clear
-   -61.90 summer - fall      +1 rain - clear 
-    -1.75 winter - fall      +1 misty - clear
-   -55.39 winter - fall      +1 rain - clear 
+  -33.256 spring - fall      +1 misty - clear
+  -76.451 spring - fall      +1 rain - clear 
+  -12.461 summer - fall      +1 misty - clear
+  -62.248 summer - fall      +1 rain - clear 
+   -0.937 winter - fall      +1 misty - clear
+  -54.656 winter - fall      +1 rain - clear 
 
 Columns: term, contrast_season, contrast_temp, contrast_weather, estimate 
 Type:  response 
diff --git a/docs/articles/machine_learning_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/machine_learning_files/figure-html/unnamed-chunk-15-1.png index ca6df71dd..7fda34a18 100644 Binary files a/docs/articles/machine_learning_files/figure-html/unnamed-chunk-15-1.png and b/docs/articles/machine_learning_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/articles/machine_learning_files/figure-html/unnamed-chunk-18-1.png b/docs/articles/machine_learning_files/figure-html/unnamed-chunk-18-1.png index 553ac9a77..0876a3792 100644 Binary files a/docs/articles/machine_learning_files/figure-html/unnamed-chunk-18-1.png and b/docs/articles/machine_learning_files/figure-html/unnamed-chunk-18-1.png differ diff --git a/docs/articles/machine_learning_files/figure-html/unnamed-chunk-19-1.png b/docs/articles/machine_learning_files/figure-html/unnamed-chunk-19-1.png index fa60fec54..d549d8b6b 100644 Binary files a/docs/articles/machine_learning_files/figure-html/unnamed-chunk-19-1.png and b/docs/articles/machine_learning_files/figure-html/unnamed-chunk-19-1.png differ diff --git a/docs/search.json b/docs/search.json index 0d6b24bc8..80ad86bf5 100644 --- a/docs/search.json +++ b/docs/search.json @@ -704,14 +704,14 @@ "href": "articles/machine_learning.html#tidymodels", "title": "\n19  Machine Learning\n", "section": "\n19.1 tidymodels\n", - "text": "19.1 tidymodels\n\nmarginaleffects also supports the tidymodels machine learning framework. When the underlying engine used by tidymodels to train the model is itself supported as a standalone package by marginaleffects, we can obtain both estimates and their standard errors:\n\nlibrary(tidymodels)\n\npenguins <- modeldata::penguins |> \n na.omit() |>\n select(sex, island, species, bill_length_mm)\n\nmod <- linear_reg(mode = \"regression\") |>\n set_engine(\"lm\") |>\n fit(bill_length_mm ~ ., data = penguins)\n\navg_comparisons(mod, type = \"numeric\", newdata = penguins)\n\n\n Term Contrast Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n island Dream - Biscoe -0.489 0.470 -1.04 0.299 1.7 -1.410 0.433\n island Torgersen - Biscoe 0.103 0.488 0.21 0.833 0.3 -0.853 1.059\n sex male - female 3.697 0.255 14.51 <0.001 156.0 3.198 4.197\n species Chinstrap - Adelie 10.347 0.422 24.54 <0.001 439.4 9.521 11.174\n species Gentoo - Adelie 8.546 0.410 20.83 <0.001 317.8 7.742 9.350\n\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \nType: numeric \n\navg_predictions(mod, type = \"numeric\", newdata = penguins, by = \"island\")\n\n\n island Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n Biscoe 45.2 0.182 248 <0.001 Inf 44.9 45.6\n Dream 44.2 0.210 211 <0.001 Inf 43.8 44.6\n Torgersen 39.0 0.339 115 <0.001 Inf 38.4 39.7\n\nColumns: island, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \nType: numeric \n\n\nWhen the underlying engine that tidymodels uses to fit the model is not supported by marginaleffects as a standalone model, we can also obtain correct results, but no uncertainy estimates. Here is a random forest model:\n\nmod <- rand_forest(mode = \"regression\") |>\n set_engine(\"ranger\") |>\n fit(bill_length_mm ~ ., data = penguins)\n\navg_comparisons(mod, newdata = penguins, type = \"numeric\")\n\n\n Term Contrast Estimate\n bill_length_mm +1 0.000\n island Dream - Biscoe 0.154\n island Torgersen - Biscoe -1.900\n sex male - female 2.724\n species Chinstrap - Adelie 6.035\n species Gentoo - Adelie 5.933\n\nColumns: term, contrast, estimate \nType: numeric \n\n\n\n19.1.1 Workflows\ntidymodels “workflows” are a convenient way to train a model while applying a series of pre-processing steps to the data. marginaleffects supports workflows out of the box. First, let’s consider a simple regression task:\n\npenguins <- modeldata::penguins |> \n na.omit() |>\n select(sex, island, species, bill_length_mm)\n\nmod <- penguins |>\n recipe(bill_length_mm ~ island + species + sex, data = _) |>\n step_dummy(all_nominal_predictors()) |>\n workflow(spec = linear_reg(mode = \"regression\", engine = \"glm\")) |>\n fit(penguins)\n\navg_comparisons(mod, newdata = penguins, type = \"numeric\")\n\n\n Term Contrast Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n bill_length_mm +1 0.000 NA NA NA NA NA NA\n island Dream - Biscoe -0.489 0.470 -1.04 0.299 1.7 -1.410 0.433\n island Torgersen - Biscoe 0.103 0.488 0.21 0.833 0.3 -0.853 1.059\n sex male - female 3.697 0.255 14.51 <0.001 156.0 3.198 4.197\n species Chinstrap - Adelie 10.347 0.422 24.54 <0.001 439.4 9.521 11.174\n species Gentoo - Adelie 8.546 0.410 20.83 <0.001 317.8 7.742 9.350\n\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \nType: numeric \n\n\nNow, we run a classification task instead, and plot the predicted probabilities:\n\nmod <- penguins |>\n recipe(sex ~ island + species + bill_length_mm, data = _) |>\n step_dummy(all_nominal_predictors()) |>\n workflow(spec = logistic_reg(mode = \"classification\", engine = \"glm\")) |>\n fit(penguins)\n\nplot_predictions(\n mod,\n condition = c(\"bill_length_mm\", \"group\"),\n newdata = penguins,\n type = \"prob\")\n\n\n\n\nFinally, let’s consider a more complex task, where we train several models and summarize them in a table using modelsummary:\n\nlibrary(modelsummary)\n\nrecipe <- penguins |>\n recipe(sex ~ ., data = _) |>\n step_ns(bill_length_mm, deg_free = 4) |>\n step_dummy(all_nominal_predictors())\n\nmodels <- list(\n logit = logistic_reg(mode = \"classification\", engine = \"glm\"),\n forest = rand_forest(mode = \"classification\", engine = \"ranger\"),\n xgb = boost_tree(mode = \"classification\", engine = \"xgboost\")\n)\n\nlapply(models, \\(x) {\n recipe |>\n workflow(spec = x) |>\n fit(penguins) |>\n avg_comparisons(newdata = penguins, type = \"prob\") }) |>\n modelsummary(\n shape = term + contrast + group ~ model,\n coef_omit = \"sex\",\n coef_rename = coef_rename)\n\n\n\n\n\n\nlogit\nforest\nxgb\n\n\n\nBill Length Mm\n+1\nfemale\n−0.101\n−0.059\n−0.075\n\n\n\n\n\n(0.004)\n\n\n\n\n\n\nmale\n0.101\n0.059\n0.075\n\n\n\n\n\n(0.004)\n\n\n\n\nIsland\nDream - Biscoe\nfemale\n−0.044\n0.011\n−0.004\n\n\n\n\n\n(0.069)\n\n\n\n\n\n\nmale\n0.044\n−0.011\n0.004\n\n\n\n\n\n(0.069)\n\n\n\n\n\nTorgersen - Biscoe\nfemale\n0.015\n−0.056\n0.008\n\n\n\n\n\n(0.074)\n\n\n\n\n\n\nmale\n−0.015\n0.056\n−0.008\n\n\n\n\n\n(0.074)\n\n\n\n\nSpecies\nChinstrap - Adelie\nfemale\n0.562\n0.162\n0.441\n\n\n\n\n\n(0.036)\n\n\n\n\n\n\nmale\n−0.562\n−0.162\n−0.441\n\n\n\n\n\n(0.036)\n\n\n\n\n\nGentoo - Adelie\nfemale\n0.453\n0.119\n0.361\n\n\n\n\n\n(0.025)\n\n\n\n\n\n\nmale\n−0.453\n−0.119\n−0.361\n\n\n\n\n\n(0.025)\n\n\n\n\nNum.Obs.\n\n\n333\n\n\n\n\nAIC\n\n\n302.2\n\n\n\n\nBIC\n\n\n336.4\n\n\n\n\nLog.Lik.\n\n\n−142.082" + "text": "19.1 tidymodels\n\nmarginaleffects also supports the tidymodels machine learning framework. When the underlying engine used by tidymodels to train the model is itself supported as a standalone package by marginaleffects, we can obtain both estimates and their standard errors:\n\nlibrary(tidymodels)\n\npenguins <- modeldata::penguins |> \n na.omit() |>\n select(sex, island, species, bill_length_mm)\n\nmod <- linear_reg(mode = \"regression\") |>\n set_engine(\"lm\") |>\n fit(bill_length_mm ~ ., data = penguins)\n\navg_comparisons(mod, type = \"numeric\", newdata = penguins)\n\n\n Term Contrast Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n island Dream - Biscoe -0.489 0.470 -1.04 0.299 1.7 -1.410 0.433\n island Torgersen - Biscoe 0.103 0.488 0.21 0.833 0.3 -0.853 1.059\n sex male - female 3.697 0.255 14.51 <0.001 156.0 3.198 4.197\n species Chinstrap - Adelie 10.347 0.422 24.54 <0.001 439.4 9.521 11.174\n species Gentoo - Adelie 8.546 0.410 20.83 <0.001 317.8 7.742 9.350\n\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \nType: numeric \n\navg_predictions(mod, type = \"numeric\", newdata = penguins, by = \"island\")\n\n\n island Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n Biscoe 45.2 0.182 248 <0.001 Inf 44.9 45.6\n Dream 44.2 0.210 211 <0.001 Inf 43.8 44.6\n Torgersen 39.0 0.339 115 <0.001 Inf 38.4 39.7\n\nColumns: island, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \nType: numeric \n\n\nWhen the underlying engine that tidymodels uses to fit the model is not supported by marginaleffects as a standalone model, we can also obtain correct results, but no uncertainy estimates. Here is a random forest model:\n\nmod <- rand_forest(mode = \"regression\") |>\n set_engine(\"ranger\") |>\n fit(bill_length_mm ~ ., data = penguins)\n\navg_comparisons(mod, newdata = penguins, type = \"numeric\")\n\n\n Term Contrast Estimate\n bill_length_mm +1 0.000\n island Dream - Biscoe 0.227\n island Torgersen - Biscoe -2.209\n sex male - female 2.605\n species Chinstrap - Adelie 6.088\n species Gentoo - Adelie 6.112\n\nColumns: term, contrast, estimate \nType: numeric \n\n\n\n19.1.1 Workflows\ntidymodels “workflows” are a convenient way to train a model while applying a series of pre-processing steps to the data. marginaleffects supports workflows out of the box. First, let’s consider a simple regression task:\n\npenguins <- modeldata::penguins |> \n na.omit() |>\n select(sex, island, species, bill_length_mm)\n\nmod <- penguins |>\n recipe(bill_length_mm ~ island + species + sex, data = _) |>\n step_dummy(all_nominal_predictors()) |>\n workflow(spec = linear_reg(mode = \"regression\", engine = \"glm\")) |>\n fit(penguins)\n\navg_comparisons(mod, newdata = penguins, type = \"numeric\")\n\n\n Term Contrast Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %\n bill_length_mm +1 0.000 NA NA NA NA NA NA\n island Dream - Biscoe -0.489 0.470 -1.04 0.299 1.7 -1.410 0.433\n island Torgersen - Biscoe 0.103 0.488 0.21 0.833 0.3 -0.853 1.059\n sex male - female 3.697 0.255 14.51 <0.001 156.0 3.198 4.197\n species Chinstrap - Adelie 10.347 0.422 24.54 <0.001 439.4 9.521 11.174\n species Gentoo - Adelie 8.546 0.410 20.83 <0.001 317.8 7.742 9.350\n\nColumns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high \nType: numeric \n\n\nNow, we run a classification task instead, and plot the predicted probabilities:\n\nmod <- penguins |>\n recipe(sex ~ island + species + bill_length_mm, data = _) |>\n step_dummy(all_nominal_predictors()) |>\n workflow(spec = logistic_reg(mode = \"classification\", engine = \"glm\")) |>\n fit(penguins)\n\nplot_predictions(\n mod,\n condition = c(\"bill_length_mm\", \"group\"),\n newdata = penguins,\n type = \"prob\")\n\n\n\n\nFinally, let’s consider a more complex task, where we train several models and summarize them in a table using modelsummary:\n\nlibrary(modelsummary)\n\n# pre-processing\npre <- penguins |>\n recipe(sex ~ ., data = _) |>\n step_ns(bill_length_mm, deg_free = 4) |>\n step_dummy(all_nominal_predictors())\n\n# modelling strategies\nmodels <- list(\n \"Logit\" = logistic_reg(mode = \"classification\", engine = \"glm\"),\n \"Random Forest\" = rand_forest(mode = \"classification\", engine = \"ranger\"),\n \"XGBooost\" = boost_tree(mode = \"classification\", engine = \"xgboost\")\n)\n\n# fit to data\nfits <- lapply(models, \\(x) {\n pre |>\n workflow(spec = x) |>\n fit(penguins)\n})\n\n# marginaleffects\ncmp <- lapply(fits, avg_comparisons, newdata = penguins, type = \"prob\")\n\n# summary table\nmodelsummary(\n cmp,\n shape = term + contrast + group ~ model,\n coef_omit = \"sex\",\n coef_rename = coef_rename)\n\n\n\n\n\n\nLogit\nRandom Forest\n XGBooost\n\n\n\nBill Length Mm\n+1\nfemale\n−0.101\n−0.062\n−0.075\n\n\n\n\n\n(0.004)\n\n\n\n\n\n\nmale\n0.101\n0.062\n0.075\n\n\n\n\n\n(0.004)\n\n\n\n\nIsland\nDream - Biscoe\nfemale\n−0.044\n0.007\n−0.004\n\n\n\n\n\n(0.069)\n\n\n\n\n\n\nmale\n0.044\n−0.007\n0.004\n\n\n\n\n\n(0.069)\n\n\n\n\n\nTorgersen - Biscoe\nfemale\n0.015\n−0.063\n0.008\n\n\n\n\n\n(0.074)\n\n\n\n\n\n\nmale\n−0.015\n0.063\n−0.008\n\n\n\n\n\n(0.074)\n\n\n\n\nSpecies\nChinstrap - Adelie\nfemale\n0.562\n0.166\n0.441\n\n\n\n\n\n(0.036)\n\n\n\n\n\n\nmale\n−0.562\n−0.166\n−0.441\n\n\n\n\n\n(0.036)\n\n\n\n\n\nGentoo - Adelie\nfemale\n0.453\n0.110\n0.361\n\n\n\n\n\n(0.025)\n\n\n\n\n\n\nmale\n−0.453\n−0.110\n−0.361\n\n\n\n\n\n(0.025)\n\n\n\n\nNum.Obs.\n\n\n333\n\n\n\n\nAIC\n\n\n302.2\n\n\n\n\nBIC\n\n\n336.4\n\n\n\n\nLog.Lik.\n\n\n−142.082" }, { "objectID": "articles/machine_learning.html#mlr3", "href": "articles/machine_learning.html#mlr3", "title": "\n19  Machine Learning\n", "section": "\n19.2 mlr3\n", - "text": "19.2 mlr3\n\nmlr3 is a machine learning framework for R. It makes it possible for users to train a wide range of models, including linear models, random forests, gradient boosting machines, and neural networks.\nIn this example, we use the bikes dataset supplied by the fmeffects package to train a random forest model predicting the number of bikes rented per hour. We then use marginaleffects to interpret the results of the model.\n\ndata(\"bikes\", package = \"fmeffects\")\n\ntask <- as_task_regr(x = bikes, id = \"bikes\", target = \"count\")\nforest <- lrn(\"regr.ranger\")$train(task)\n\nAs described in other vignettes, we can use the avg_comparisons() function to compute the average change in predicted outcome that is associated with a change in each feature:\n\navg_comparisons(forest, newdata = bikes)\n\n\n Term Contrast Estimate\n count +1 0.000\n holiday False - True 12.556\n humidity +1 -23.124\n month +1 3.892\n season spring - fall -31.058\n season summer - fall -8.917\n season winter - fall 2.506\n temp +1 3.409\n weather misty - clear -7.332\n weather rain - clear -58.989\n weekday Fri - Sun 74.481\n weekday Mon - Sun 82.925\n weekday Sat - Sun 23.244\n weekday Thu - Sun 90.207\n weekday Tue - Sun 88.818\n weekday Wed - Sun 91.096\n windspeed +1 0.221\n workingday False - True -187.642\n year 1 - 0 99.448\n\nColumns: term, contrast, estimate \nType: response \n\n\nThese results are easy to interpret: An increase of 1 degree Celsius in the temperature is associated with an increase of 3.409 bikes rented per hour.\nWe could obtain the same result manually as follows:\n\nlo <- transform(bikes, temp = temp - 0.5)\nhi <- transform(bikes, temp = temp + 0.5)\nmean(predict(forest, newdata = hi) - predict(forest, newdata = lo))\n\n[1] 3.40943\n\n\n\n19.2.1 fmeffects: Forward or centered effects\nAs the code above makes clear, the avg_comparisons() computes the effect of a “centered” change on the outcome. If we want to compute a “Forward Marginal Effect” instead, we can call:\n\navg_comparisons(\n forest,\n variables = list(\"temp\" = \\(x) data.frame(x, x + 1)),\n newdata = bikes)\n\n\n Term Contrast Estimate\n temp custom 2.3\n\nColumns: term, contrast, estimate \nType: response \n\n\nThis is equivalent to using the fmeffects package:\n\nfmeffects::fme(\n model = forest,\n data = bikes,\n target = \"count\",\n feature = \"temp\",\n step.size = 1)$ame \n\n[1] 2.30096\n\n\nWith marginaleffects::avg_comparisons(), we can also compute the average effect of a simultaneous change in multiple predictors, using the variables and cross arguments. In this example, we see what happens (on average) to the predicted outcome when the temp, season, and weather predictors all change together:\n\navg_comparisons(\n forest,\n variables = c(\"temp\", \"season\", \"weather\"),\n cross = TRUE,\n newdata = bikes)\n\n\n Estimate C: season C: temp C: weather\n -34.75 spring - fall +1 misty - clear\n -77.35 spring - fall +1 rain - clear \n -12.91 summer - fall +1 misty - clear\n -61.90 summer - fall +1 rain - clear \n -1.75 winter - fall +1 misty - clear\n -55.39 winter - fall +1 rain - clear \n\nColumns: term, contrast_season, contrast_temp, contrast_weather, estimate \nType: response" + "text": "19.2 mlr3\n\nmlr3 is a machine learning framework for R. It makes it possible for users to train a wide range of models, including linear models, random forests, gradient boosting machines, and neural networks.\nIn this example, we use the bikes dataset supplied by the fmeffects package to train a random forest model predicting the number of bikes rented per hour. We then use marginaleffects to interpret the results of the model.\n\ndata(\"bikes\", package = \"fmeffects\")\n\ntask <- as_task_regr(x = bikes, id = \"bikes\", target = \"count\")\nforest <- lrn(\"regr.ranger\")$train(task)\n\nAs described in other vignettes, we can use the avg_comparisons() function to compute the average change in predicted outcome that is associated with a change in each feature:\n\navg_comparisons(forest, newdata = bikes)\n\n\n Term Contrast Estimate\n count +1 0.000\n holiday False - True 13.101\n humidity +1 -23.369\n month +1 3.902\n season spring - fall -29.587\n season summer - fall -8.385\n season winter - fall 3.458\n temp +1 3.701\n weather misty - clear -7.973\n weather rain - clear -59.693\n weekday Fri - Sun 69.059\n weekday Mon - Sun 77.247\n weekday Sat - Sun 20.396\n weekday Thu - Sun 84.326\n weekday Tue - Sun 83.109\n weekday Wed - Sun 85.338\n windspeed +1 0.136\n workingday False - True -190.696\n year 1 - 0 97.664\n\nColumns: term, contrast, estimate \nType: response \n\n\nThese results are easy to interpret: An increase of 1 degree Celsius in the temperature is associated with an increase of 3.701 bikes rented per hour.\nWe could obtain the same result manually as follows:\n\nlo <- transform(bikes, temp = temp - 0.5)\nhi <- transform(bikes, temp = temp + 0.5)\nmean(predict(forest, newdata = hi) - predict(forest, newdata = lo))\n\n[1] 3.70069\n\n\n\n19.2.1 fmeffects: Forward or centered effects\nAs the code above makes clear, the avg_comparisons() computes the effect of a “centered” change on the outcome. If we want to compute a “Forward Marginal Effect” instead, we can call:\n\navg_comparisons(\n forest,\n variables = list(\"temp\" = \\(x) data.frame(x, x + 1)),\n newdata = bikes)\n\n\n Term Contrast Estimate\n temp custom 2.43\n\nColumns: term, contrast, estimate \nType: response \n\n\nThis is equivalent to using the fmeffects package:\n\nfmeffects::fme(\n model = forest,\n data = bikes,\n target = \"count\",\n feature = \"temp\",\n step.size = 1)$ame \n\n[1] 2.434684\n\n\nWith marginaleffects::avg_comparisons(), we can also compute the average effect of a simultaneous change in multiple predictors, using the variables and cross arguments. In this example, we see what happens (on average) to the predicted outcome when the temp, season, and weather predictors all change together:\n\navg_comparisons(\n forest,\n variables = c(\"temp\", \"season\", \"weather\"),\n cross = TRUE,\n newdata = bikes)\n\n\n Estimate C: season C: temp C: weather\n -33.256 spring - fall +1 misty - clear\n -76.451 spring - fall +1 rain - clear \n -12.461 summer - fall +1 misty - clear\n -62.248 summer - fall +1 rain - clear \n -0.937 winter - fall +1 misty - clear\n -54.656 winter - fall +1 rain - clear \n\nColumns: term, contrast_season, contrast_temp, contrast_weather, estimate \nType: response" }, { "objectID": "articles/machine_learning.html#plots",

logitforestxgbLogitRandom Forest XGBooost
+1 female −0.101−0.059−0.062 −0.075
male 0.1010.0590.062 0.075
Dream - Biscoe female −0.0440.0110.007 −0.004
male 0.044−0.011−0.007 0.004
Torgersen - Biscoe female 0.015−0.056−0.063 0.008
male −0.0150.0560.063 −0.008
Chinstrap - Adelie female 0.5620.1620.166 0.441
male −0.562−0.162−0.166 −0.441
Gentoo - Adelie female 0.4530.1190.110 0.361
male −0.453−0.119−0.110 −0.361