From 99e0a8238024bbffe7b476be9102b09eaa23dda9 Mon Sep 17 00:00:00 2001 From: jwagner31 Date: Wed, 13 Dec 2023 16:23:38 -0600 Subject: [PATCH] nearly done significance grid writeup --- notebooks/FinalMilestone.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/notebooks/FinalMilestone.ipynb b/notebooks/FinalMilestone.ipynb index c194377..60c09d9 100644 --- a/notebooks/FinalMilestone.ipynb +++ b/notebooks/FinalMilestone.ipynb @@ -35,6 +35,7 @@ "import folium\n", "from folium import plugins\n", "from pygam.pygam import LinearGAM, s, f, te, PoissonGAM\n", + "from scipy.stats import mannwhitneyu\n", "\n", "pd.options.display.max_columns = None\n", "pd.options.display.max_rows = None" @@ -119229,7 +119230,9 @@ "cell_type": "markdown", "id": "106d387d", "metadata": {}, - "source": [] + "source": [ + "Now we are ready to perform a statistical test with the data properly prepared and predicted. The aim of this test is to see if there is a significant different between the hot and cold model for the count prediction of each bin within the grid. We do this with a Mann-Whitney U test, a non parametric statistical test used to compare two independent samples. [More info on this test is found here](https://www.statology.org/mann-whitney-u-test/). The 'mannwhitneyu' funcion compares the hot model prediction counts and cold model prediciton counts, and the 'greater' parameter specifies that we are testing if the hot predictions are greater than the cold predictions. It then returns the corresponding U-statistic and p-value. We analyze the results in the following cells." + ] }, { "cell_type": "code", @@ -119335,7 +119338,6 @@ } ], "source": [ - "from scipy.stats import mannwhitneyu\n", "#pred_diffs_df = X_test[[\"Long_bin\", \"Lat_bin\"]]\n", "def mann_whitney_bin(row):\n", " target_long, target_lat = row[\"Long_bin\"], row[\"Lat_bin\"]\n",