diff --git a/notebooks/03_Take_Home_Exam.ipynb b/notebooks/03_Take_Home_Exam.ipynb
deleted file mode 100644
index 6f65005..0000000
--- a/notebooks/03_Take_Home_Exam.ipynb
+++ /dev/null
@@ -1,1301 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "15568cc0-be91-49e5-9a8c-1030cc298650",
- "metadata": {
- "tags": []
- },
- "source": [
- "# Take Home Exam: Mobility Index Calculation and Profiling"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "94ccdd38",
- "metadata": {},
- "source": [
- "## 1. Create \"Mobility Index\" and \"Mobility Class\""
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b74fd38c",
- "metadata": {},
- "source": [
- "Using the the following features/indicators:
\n",
- "
\n",
- " 1. Total Distance Traveled
\n",
- " 2. Radius of Gyration
\n",
- " 3. Activity Entropy
\n",
- " \n",
- "Create a calculated feature called **Mobility Index** (type: decimal/float) and **Mobility Class** (which is are categorized as Low, Mid, and High) for each subscriber.
\n",
- "Team is free to use any methods or technique to arrive at the **OPTIMAL** Mobility Index and Mobility Class as long as it is supported by literature/s.
\n",
- "
\n",
- "**Deadline of the submission is September 1, 2023.**
\n",
- "
\n",
- "
\n",
- "**Criteria for scoring**
\n",
- "1. Creation of mobility class - 50 pts
\n",
- "2. Creation of mobility index - 30 pts
\n",
- "3. Efficiency of process - 20 pts
\n",
- " Total -100 pts \n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "835e89f7",
- "metadata": {},
- "source": [
- "## 2. Example of Mobility Index"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "1cfd98fc",
- "metadata": {},
- "outputs": [],
- "source": [
- "import random\n",
- "import shapely\n",
- "import pendulum\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "from scipy import stats\n",
- "pd.options.display.max_rows=200\n",
- "import geopandas as gpd\n",
- "import matplotlib.pyplot as plt\n",
- "from IPython.display import HTML, display\n",
- "from functools import reduce\n",
- "import pyproj\n",
- "from functools import partial"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "238621ee",
- "metadata": {},
- "source": [
- "#### Sample ABT"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "91355f76",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Unnamed: 0 | \n",
- " sub_uid | \n",
- " gender | \n",
- " age | \n",
- " name | \n",
- " chi_indicator | \n",
- " ewallet_user_indicator | \n",
- " total_travel_distance | \n",
- " radius_of_gyration | \n",
- " activity_entropy | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 32 | \n",
- " 32 | \n",
- " glo-sub-023 | \n",
- " female | \n",
- " 62 | \n",
- " Brandi Taylor | \n",
- " True | \n",
- " Y | \n",
- " 187400.353151 | \n",
- " 2773.800971 | \n",
- " 1.393043 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 22 | \n",
- " glo-sub-061 | \n",
- " male | \n",
- " 64 | \n",
- " David Evans | \n",
- " False | \n",
- " Y | \n",
- " 168249.882120 | \n",
- " 1710.550891 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 17 | \n",
- " glo-sub-069 | \n",
- " male | \n",
- " 51 | \n",
- " Kevin Gibson | \n",
- " True | \n",
- " N | \n",
- " 162675.199376 | \n",
- " 1274.739666 | \n",
- " 1.279325 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 4 | \n",
- " glo-sub-076 | \n",
- " male | \n",
- " 46 | \n",
- " Colin Mejia | \n",
- " False | \n",
- " N | \n",
- " 123646.921636 | \n",
- " 1671.299423 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 98 | \n",
- " 98 | \n",
- " glo-sub-046 | \n",
- " male | \n",
- " 24 | \n",
- " Luis Jackson | \n",
- " True | \n",
- " Y | \n",
- " 385134.568227 | \n",
- " 2206.908060 | \n",
- " 1.228771 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Unnamed: 0 sub_uid gender age name chi_indicator \\\n",
- "32 32 glo-sub-023 female 62 Brandi Taylor True \n",
- "22 22 glo-sub-061 male 64 David Evans False \n",
- "17 17 glo-sub-069 male 51 Kevin Gibson True \n",
- "4 4 glo-sub-076 male 46 Colin Mejia False \n",
- "98 98 glo-sub-046 male 24 Luis Jackson True \n",
- "\n",
- " ewallet_user_indicator total_travel_distance radius_of_gyration \\\n",
- "32 Y 187400.353151 2773.800971 \n",
- "22 Y 168249.882120 1710.550891 \n",
- "17 N 162675.199376 1274.739666 \n",
- "4 N 123646.921636 1671.299423 \n",
- "98 Y 385134.568227 2206.908060 \n",
- "\n",
- " activity_entropy \n",
- "32 1.393043 \n",
- "22 NaN \n",
- "17 1.279325 \n",
- "4 NaN \n",
- "98 1.228771 "
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "file_path_sample_data = \"C:/Users/10012425/Desktop/sds4gdsp/scoring_base.csv\"\n",
- "ABT_mobility = pd.read_csv(file_path_sample_data)\n",
- "ABT_mobility.sample(5)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "dc963d69",
- "metadata": {},
- "source": [
- "### Apply min max scaling capping"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "c74ec061",
- "metadata": {},
- "outputs": [],
- "source": [
- "from sklearn.preprocessing import MinMaxScaler"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "a396858e",
- "metadata": {},
- "outputs": [],
- "source": [
- "df_capping = pd.DataFrame(ABT_mobility, columns=['total_travel_distance', 'radius_of_gyration','activity_entropy'])\n",
- "\n",
- "column_headers = df_capping.columns.tolist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "3186fb50",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " total_travel_distance | \n",
- " radius_of_gyration | \n",
- " activity_entropy | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0.000000 | \n",
- " 0.015741 | \n",
- " 0.400992 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.076253 | \n",
- " 0.182373 | \n",
- " 0.504028 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.098540 | \n",
- " 0.175793 | \n",
- " 0.692796 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0.100956 | \n",
- " 0.024102 | \n",
- " 0.004103 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0.102464 | \n",
- " 0.369021 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 0.126786 | \n",
- " 0.000000 | \n",
- " 0.128279 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 0.130826 | \n",
- " 0.088019 | \n",
- " 0.368835 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 0.137770 | \n",
- " 0.024837 | \n",
- " 0.115453 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 0.139332 | \n",
- " 0.223741 | \n",
- " 0.638321 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 0.141096 | \n",
- " 0.183562 | \n",
- " 0.408091 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0.145877 | \n",
- " 0.018146 | \n",
- " 0.150974 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 0.164302 | \n",
- " 0.224896 | \n",
- " 0.669492 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 0.166832 | \n",
- " 0.153687 | \n",
- " 0.657595 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 0.168286 | \n",
- " 0.053673 | \n",
- " 0.128041 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 0.173987 | \n",
- " 0.652952 | \n",
- " 0.862067 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 0.193627 | \n",
- " 0.094938 | \n",
- " 0.560008 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 0.206985 | \n",
- " 0.459849 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 0.227320 | \n",
- " 0.150694 | \n",
- " 0.693717 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 0.229192 | \n",
- " 0.116003 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 0.231166 | \n",
- " 0.109158 | \n",
- " 0.392414 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 0.232562 | \n",
- " 0.132373 | \n",
- " 0.719609 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 0.238558 | \n",
- " 0.151386 | \n",
- " 0.513148 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 0.245155 | \n",
- " 0.390631 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 0.249958 | \n",
- " 0.694434 | \n",
- " 0.734925 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 0.250839 | \n",
- " 0.126739 | \n",
- " 0.564629 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 0.251877 | \n",
- " 0.456084 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 0.262651 | \n",
- " 0.316213 | \n",
- " 0.641308 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 0.263391 | \n",
- " 0.734126 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 0.271244 | \n",
- " 0.187688 | \n",
- " 0.644671 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 0.278703 | \n",
- " 0.209653 | \n",
- " 0.697244 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " 0.290667 | \n",
- " 0.420993 | \n",
- " 0.830829 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " 0.293685 | \n",
- " 0.745656 | \n",
- " 0.881326 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " 0.306420 | \n",
- " 0.976004 | \n",
- " 0.846744 | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " 0.324686 | \n",
- " 0.250294 | \n",
- " 0.780299 | \n",
- "
\n",
- " \n",
- " 34 | \n",
- " 0.326003 | \n",
- " 0.416509 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 35 | \n",
- " 0.334206 | \n",
- " 0.350879 | \n",
- " 0.441645 | \n",
- "
\n",
- " \n",
- " 36 | \n",
- " 0.367764 | \n",
- " 0.224580 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 37 | \n",
- " 0.373848 | \n",
- " 0.392623 | \n",
- " 0.465124 | \n",
- "
\n",
- " \n",
- " 38 | \n",
- " 0.393327 | \n",
- " 0.585133 | \n",
- " 0.818701 | \n",
- "
\n",
- " \n",
- " 39 | \n",
- " 0.393600 | \n",
- " 0.584121 | \n",
- " 0.846100 | \n",
- "
\n",
- " \n",
- " 40 | \n",
- " 0.400479 | \n",
- " 0.487088 | \n",
- " 0.749415 | \n",
- "
\n",
- " \n",
- " 41 | \n",
- " 0.406267 | \n",
- " 0.665625 | \n",
- " 0.747511 | \n",
- "
\n",
- " \n",
- " 42 | \n",
- " 0.410720 | \n",
- " 0.932120 | \n",
- " 0.942203 | \n",
- "
\n",
- " \n",
- " 43 | \n",
- " 0.422581 | \n",
- " 0.415363 | \n",
- " 0.388612 | \n",
- "
\n",
- " \n",
- " 44 | \n",
- " 0.435018 | \n",
- " 0.468543 | \n",
- " 0.284707 | \n",
- "
\n",
- " \n",
- " 45 | \n",
- " 0.441460 | \n",
- " 0.262436 | \n",
- " 0.805165 | \n",
- "
\n",
- " \n",
- " 46 | \n",
- " 0.443289 | \n",
- " 0.373030 | \n",
- " 0.488329 | \n",
- "
\n",
- " \n",
- " 47 | \n",
- " 0.454203 | \n",
- " 1.000000 | \n",
- " 0.921311 | \n",
- "
\n",
- " \n",
- " 48 | \n",
- " 0.461219 | \n",
- " 0.954661 | \n",
- " 0.874746 | \n",
- "
\n",
- " \n",
- " 49 | \n",
- " 0.471829 | \n",
- " 0.909112 | \n",
- " 0.890261 | \n",
- "
\n",
- " \n",
- " 50 | \n",
- " 0.476076 | \n",
- " 0.632397 | \n",
- " 0.780436 | \n",
- "
\n",
- " \n",
- " 51 | \n",
- " 0.482461 | \n",
- " 0.619962 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 52 | \n",
- " 0.486138 | \n",
- " 0.300776 | \n",
- " 0.892967 | \n",
- "
\n",
- " \n",
- " 53 | \n",
- " 0.493235 | \n",
- " 0.687713 | \n",
- " 0.836162 | \n",
- "
\n",
- " \n",
- " 54 | \n",
- " 0.495475 | \n",
- " 0.502072 | \n",
- " 0.647253 | \n",
- "
\n",
- " \n",
- " 55 | \n",
- " 0.503071 | \n",
- " 0.688536 | \n",
- " 0.787323 | \n",
- "
\n",
- " \n",
- " 56 | \n",
- " 0.505641 | \n",
- " 0.852631 | \n",
- " 0.950744 | \n",
- "
\n",
- " \n",
- " 57 | \n",
- " 0.508837 | \n",
- " 0.899560 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 58 | \n",
- " 0.518911 | \n",
- " 0.629803 | \n",
- " 0.561174 | \n",
- "
\n",
- " \n",
- " 59 | \n",
- " 0.525449 | \n",
- " 0.415542 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 60 | \n",
- " 0.533844 | \n",
- " 0.760338 | \n",
- " 0.998669 | \n",
- "
\n",
- " \n",
- " 61 | \n",
- " 0.539236 | \n",
- " 0.924828 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 62 | \n",
- " 0.539789 | \n",
- " 0.722342 | \n",
- " 0.693652 | \n",
- "
\n",
- " \n",
- " 63 | \n",
- " 0.547888 | \n",
- " 0.399018 | \n",
- " 0.389120 | \n",
- "
\n",
- " \n",
- " 64 | \n",
- " 0.550858 | \n",
- " 0.530149 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 65 | \n",
- " 0.560427 | \n",
- " 0.580756 | \n",
- " 0.726224 | \n",
- "
\n",
- " \n",
- " 66 | \n",
- " 0.573460 | \n",
- " 0.867476 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- " 67 | \n",
- " 0.575976 | \n",
- " 0.656549 | \n",
- " 0.806102 | \n",
- "
\n",
- " \n",
- " 68 | \n",
- " 0.576479 | \n",
- " 0.774121 | \n",
- " 0.934544 | \n",
- "
\n",
- " \n",
- " 69 | \n",
- " 0.582866 | \n",
- " 0.577673 | \n",
- " 0.694904 | \n",
- "
\n",
- " \n",
- " 70 | \n",
- " 0.584790 | \n",
- " 0.491039 | \n",
- " 0.128070 | \n",
- "
\n",
- " \n",
- " 71 | \n",
- " 0.584950 | \n",
- " 0.745514 | \n",
- " 0.886276 | \n",
- "
\n",
- " \n",
- " 72 | \n",
- " 0.586260 | \n",
- " 0.717353 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 73 | \n",
- " 0.614757 | \n",
- " 0.673194 | \n",
- " 0.603687 | \n",
- "
\n",
- " \n",
- " 74 | \n",
- " 0.648191 | \n",
- " 0.782951 | \n",
- " 0.667259 | \n",
- "
\n",
- " \n",
- " 75 | \n",
- " 0.650132 | \n",
- " 0.620419 | \n",
- " 0.868859 | \n",
- "
\n",
- " \n",
- " 76 | \n",
- " 0.653933 | \n",
- " 0.793290 | \n",
- " 0.665075 | \n",
- "
\n",
- " \n",
- " 77 | \n",
- " 0.661956 | \n",
- " 0.544567 | \n",
- " 0.687681 | \n",
- "
\n",
- " \n",
- " 78 | \n",
- " 0.662519 | \n",
- " 0.740416 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 79 | \n",
- " 0.669378 | \n",
- " 0.556351 | \n",
- " 0.691892 | \n",
- "
\n",
- " \n",
- " 80 | \n",
- " 0.682696 | \n",
- " 0.432683 | \n",
- " 0.498591 | \n",
- "
\n",
- " \n",
- " 81 | \n",
- " 0.683442 | \n",
- " 0.562822 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 82 | \n",
- " 0.697061 | \n",
- " 0.898199 | \n",
- " 0.628065 | \n",
- "
\n",
- " \n",
- " 83 | \n",
- " 0.701713 | \n",
- " 0.635278 | \n",
- " 0.682781 | \n",
- "
\n",
- " \n",
- " 84 | \n",
- " 0.707848 | \n",
- " 0.628726 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 85 | \n",
- " 0.708291 | \n",
- " 0.525185 | \n",
- " 0.548055 | \n",
- "
\n",
- " \n",
- " 86 | \n",
- " 0.753964 | \n",
- " 0.880014 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 87 | \n",
- " 0.754376 | \n",
- " 0.648879 | \n",
- " 0.705661 | \n",
- "
\n",
- " \n",
- " 88 | \n",
- " 0.771381 | \n",
- " 0.683648 | \n",
- " 0.770484 | \n",
- "
\n",
- " \n",
- " 89 | \n",
- " 0.780210 | \n",
- " 0.690189 | \n",
- " 0.909890 | \n",
- "
\n",
- " \n",
- " 90 | \n",
- " 0.787743 | \n",
- " 0.745578 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 91 | \n",
- " 0.797744 | \n",
- " 0.564313 | \n",
- " 0.521526 | \n",
- "
\n",
- " \n",
- " 92 | \n",
- " 0.815482 | \n",
- " 0.844322 | \n",
- " 0.917914 | \n",
- "
\n",
- " \n",
- " 93 | \n",
- " 0.821048 | \n",
- " 0.578163 | \n",
- " 0.781067 | \n",
- "
\n",
- " \n",
- " 94 | \n",
- " 0.832475 | \n",
- " 0.756700 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 95 | \n",
- " 0.864773 | \n",
- " 0.597244 | \n",
- " 0.474646 | \n",
- "
\n",
- " \n",
- " 96 | \n",
- " 0.867214 | \n",
- " 0.577540 | \n",
- " 0.861811 | \n",
- "
\n",
- " \n",
- " 97 | \n",
- " 0.920367 | \n",
- " 0.667763 | \n",
- " 0.592899 | \n",
- "
\n",
- " \n",
- " 98 | \n",
- " 0.938998 | \n",
- " 0.663901 | \n",
- " 0.625688 | \n",
- "
\n",
- " \n",
- " 99 | \n",
- " 1.000000 | \n",
- " 0.568807 | \n",
- " 0.497987 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " total_travel_distance radius_of_gyration activity_entropy\n",
- "0 0.000000 0.015741 0.400992\n",
- "1 0.076253 0.182373 0.504028\n",
- "2 0.098540 0.175793 0.692796\n",
- "3 0.100956 0.024102 0.004103\n",
- "4 0.102464 0.369021 NaN\n",
- "5 0.126786 0.000000 0.128279\n",
- "6 0.130826 0.088019 0.368835\n",
- "7 0.137770 0.024837 0.115453\n",
- "8 0.139332 0.223741 0.638321\n",
- "9 0.141096 0.183562 0.408091\n",
- "10 0.145877 0.018146 0.150974\n",
- "11 0.164302 0.224896 0.669492\n",
- "12 0.166832 0.153687 0.657595\n",
- "13 0.168286 0.053673 0.128041\n",
- "14 0.173987 0.652952 0.862067\n",
- "15 0.193627 0.094938 0.560008\n",
- "16 0.206985 0.459849 NaN\n",
- "17 0.227320 0.150694 0.693717\n",
- "18 0.229192 0.116003 NaN\n",
- "19 0.231166 0.109158 0.392414\n",
- "20 0.232562 0.132373 0.719609\n",
- "21 0.238558 0.151386 0.513148\n",
- "22 0.245155 0.390631 NaN\n",
- "23 0.249958 0.694434 0.734925\n",
- "24 0.250839 0.126739 0.564629\n",
- "25 0.251877 0.456084 NaN\n",
- "26 0.262651 0.316213 0.641308\n",
- "27 0.263391 0.734126 NaN\n",
- "28 0.271244 0.187688 0.644671\n",
- "29 0.278703 0.209653 0.697244\n",
- "30 0.290667 0.420993 0.830829\n",
- "31 0.293685 0.745656 0.881326\n",
- "32 0.306420 0.976004 0.846744\n",
- "33 0.324686 0.250294 0.780299\n",
- "34 0.326003 0.416509 NaN\n",
- "35 0.334206 0.350879 0.441645\n",
- "36 0.367764 0.224580 NaN\n",
- "37 0.373848 0.392623 0.465124\n",
- "38 0.393327 0.585133 0.818701\n",
- "39 0.393600 0.584121 0.846100\n",
- "40 0.400479 0.487088 0.749415\n",
- "41 0.406267 0.665625 0.747511\n",
- "42 0.410720 0.932120 0.942203\n",
- "43 0.422581 0.415363 0.388612\n",
- "44 0.435018 0.468543 0.284707\n",
- "45 0.441460 0.262436 0.805165\n",
- "46 0.443289 0.373030 0.488329\n",
- "47 0.454203 1.000000 0.921311\n",
- "48 0.461219 0.954661 0.874746\n",
- "49 0.471829 0.909112 0.890261\n",
- "50 0.476076 0.632397 0.780436\n",
- "51 0.482461 0.619962 NaN\n",
- "52 0.486138 0.300776 0.892967\n",
- "53 0.493235 0.687713 0.836162\n",
- "54 0.495475 0.502072 0.647253\n",
- "55 0.503071 0.688536 0.787323\n",
- "56 0.505641 0.852631 0.950744\n",
- "57 0.508837 0.899560 NaN\n",
- "58 0.518911 0.629803 0.561174\n",
- "59 0.525449 0.415542 0.000000\n",
- "60 0.533844 0.760338 0.998669\n",
- "61 0.539236 0.924828 NaN\n",
- "62 0.539789 0.722342 0.693652\n",
- "63 0.547888 0.399018 0.389120\n",
- "64 0.550858 0.530149 NaN\n",
- "65 0.560427 0.580756 0.726224\n",
- "66 0.573460 0.867476 1.000000\n",
- "67 0.575976 0.656549 0.806102\n",
- "68 0.576479 0.774121 0.934544\n",
- "69 0.582866 0.577673 0.694904\n",
- "70 0.584790 0.491039 0.128070\n",
- "71 0.584950 0.745514 0.886276\n",
- "72 0.586260 0.717353 NaN\n",
- "73 0.614757 0.673194 0.603687\n",
- "74 0.648191 0.782951 0.667259\n",
- "75 0.650132 0.620419 0.868859\n",
- "76 0.653933 0.793290 0.665075\n",
- "77 0.661956 0.544567 0.687681\n",
- "78 0.662519 0.740416 NaN\n",
- "79 0.669378 0.556351 0.691892\n",
- "80 0.682696 0.432683 0.498591\n",
- "81 0.683442 0.562822 NaN\n",
- "82 0.697061 0.898199 0.628065\n",
- "83 0.701713 0.635278 0.682781\n",
- "84 0.707848 0.628726 NaN\n",
- "85 0.708291 0.525185 0.548055\n",
- "86 0.753964 0.880014 NaN\n",
- "87 0.754376 0.648879 0.705661\n",
- "88 0.771381 0.683648 0.770484\n",
- "89 0.780210 0.690189 0.909890\n",
- "90 0.787743 0.745578 NaN\n",
- "91 0.797744 0.564313 0.521526\n",
- "92 0.815482 0.844322 0.917914\n",
- "93 0.821048 0.578163 0.781067\n",
- "94 0.832475 0.756700 NaN\n",
- "95 0.864773 0.597244 0.474646\n",
- "96 0.867214 0.577540 0.861811\n",
- "97 0.920367 0.667763 0.592899\n",
- "98 0.938998 0.663901 0.625688\n",
- "99 1.000000 0.568807 0.497987"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Create a MinMaxScaler instance\n",
- "scaler = MinMaxScaler()\n",
- "\n",
- "# Fit the scaler on the data and transform it\n",
- "scaled_data = scaler.fit_transform(df_capping)\n",
- "\n",
- "# Convert scaled data back to a DataFrame\n",
- "scaled_df = pd.DataFrame(scaled_data, columns=column_headers)\n",
- "scaled_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "094e9cc1",
- "metadata": {},
- "outputs": [],
- "source": [
- "scaled_df = scaled_df.fillna(0)\n",
- "\n",
- "scaled_df[\"mobility_index\"] = (scaled_df[\"total_travel_distance\"] + scaled_df[\"radius_of_gyration\"] + scaled_df[\"activity_entropy\"]) /3"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "1c0b40ab",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " total_travel_distance | \n",
- " radius_of_gyration | \n",
- " activity_entropy | \n",
- " mobility_index | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 93 | \n",
- " 0.821048 | \n",
- " 0.578163 | \n",
- " 0.781067 | \n",
- " 0.726759 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.098540 | \n",
- " 0.175793 | \n",
- " 0.692796 | \n",
- " 0.322376 | \n",
- "
\n",
- " \n",
- " 88 | \n",
- " 0.771381 | \n",
- " 0.683648 | \n",
- " 0.770484 | \n",
- " 0.741838 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 0.263391 | \n",
- " 0.734126 | \n",
- " 0.000000 | \n",
- " 0.332506 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0.145877 | \n",
- " 0.018146 | \n",
- " 0.150974 | \n",
- " 0.104999 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " total_travel_distance radius_of_gyration activity_entropy \\\n",
- "93 0.821048 0.578163 0.781067 \n",
- "2 0.098540 0.175793 0.692796 \n",
- "88 0.771381 0.683648 0.770484 \n",
- "27 0.263391 0.734126 0.000000 \n",
- "10 0.145877 0.018146 0.150974 \n",
- "\n",
- " mobility_index \n",
- "93 0.726759 \n",
- "2 0.322376 \n",
- "88 0.741838 \n",
- "27 0.332506 \n",
- "10 0.104999 "
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "scaled_df.sample(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "f2777ad1",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " total_travel_distance | \n",
- " radius_of_gyration | \n",
- " activity_entropy | \n",
- " mobility_index | \n",
- " Category | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 46 | \n",
- " 0.443289 | \n",
- " 0.373030 | \n",
- " 0.488329 | \n",
- " 0.434882 | \n",
- " Mid | \n",
- "
\n",
- " \n",
- " 37 | \n",
- " 0.373848 | \n",
- " 0.392623 | \n",
- " 0.465124 | \n",
- " 0.410532 | \n",
- " Mid | \n",
- "
\n",
- " \n",
- " 62 | \n",
- " 0.539789 | \n",
- " 0.722342 | \n",
- " 0.693652 | \n",
- " 0.651928 | \n",
- " Mid | \n",
- "
\n",
- " \n",
- " 40 | \n",
- " 0.400479 | \n",
- " 0.487088 | \n",
- " 0.749415 | \n",
- " 0.545661 | \n",
- " Mid | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 0.193627 | \n",
- " 0.094938 | \n",
- " 0.560008 | \n",
- " 0.282858 | \n",
- " Low | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " total_travel_distance radius_of_gyration activity_entropy \\\n",
- "46 0.443289 0.373030 0.488329 \n",
- "37 0.373848 0.392623 0.465124 \n",
- "62 0.539789 0.722342 0.693652 \n",
- "40 0.400479 0.487088 0.749415 \n",
- "15 0.193627 0.094938 0.560008 \n",
- "\n",
- " mobility_index Category \n",
- "46 0.434882 Mid \n",
- "37 0.410532 Mid \n",
- "62 0.651928 Mid \n",
- "40 0.545661 Mid \n",
- "15 0.282858 Low "
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "low_threshold = 0.3\n",
- "high_threshold = 0.7\n",
- "\n",
- "# Create a function to categorize values\n",
- "def categorize(value):\n",
- " if value < low_threshold:\n",
- " return 'Low'\n",
- " elif value < high_threshold:\n",
- " return 'Mid'\n",
- " elif value > high_threshold:\n",
- " return 'High'\n",
- " else:\n",
- " return 'Low'\n",
- "\n",
- "# Apply the categorize function to the 'Value' column\n",
- "scaled_df['Category'] = scaled_df['mobility_index'].apply(categorize)\n",
- "\n",
- "scaled_df.sample(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "e8536d1d",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Category\n",
- "High 21\n",
- "Low 18\n",
- "Mid 61\n",
- "dtype: int64"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mobility_class = pd.DataFrame(scaled_df)\n",
- "scaled_df.groupby('Category').size()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "06681c5d",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.16"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}