diff --git a/notebooks/03_Take_Home_Exam.ipynb b/notebooks/03_Take_Home_Exam.ipynb deleted file mode 100644 index 6f65005..0000000 --- a/notebooks/03_Take_Home_Exam.ipynb +++ /dev/null @@ -1,1301 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "15568cc0-be91-49e5-9a8c-1030cc298650", - "metadata": { - "tags": [] - }, - "source": [ - "# Take Home Exam: Mobility Index Calculation and Profiling" - ] - }, - { - "cell_type": "markdown", - "id": "94ccdd38", - "metadata": {}, - "source": [ - "## 1. Create \"Mobility Index\" and \"Mobility Class\"" - ] - }, - { - "cell_type": "markdown", - "id": "b74fd38c", - "metadata": {}, - "source": [ - "Using the the following features/indicators:
\n", - "
\n", - " 1. Total Distance Traveled
\n", - " 2. Radius of Gyration
\n", - " 3. Activity Entropy
\n", - " \n", - "Create a calculated feature called **Mobility Index** (type: decimal/float) and **Mobility Class** (which is are categorized as Low, Mid, and High) for each subscriber.
\n", - "Team is free to use any methods or technique to arrive at the **OPTIMAL** Mobility Index and Mobility Class as long as it is supported by literature/s.
\n", - "
\n", - "**Deadline of the submission is September 1, 2023.**
\n", - "
\n", - "
\n", - "**Criteria for scoring**
\n", - "1. Creation of mobility class - 50 pts
\n", - "2. Creation of mobility index - 30 pts
\n", - "3. Efficiency of process - 20 pts
\n", - " Total -100 pts \n" - ] - }, - { - "cell_type": "markdown", - "id": "835e89f7", - "metadata": {}, - "source": [ - "## 2. Example of Mobility Index" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "1cfd98fc", - "metadata": {}, - "outputs": [], - "source": [ - "import random\n", - "import shapely\n", - "import pendulum\n", - "import numpy as np\n", - "import pandas as pd\n", - "from scipy import stats\n", - "pd.options.display.max_rows=200\n", - "import geopandas as gpd\n", - "import matplotlib.pyplot as plt\n", - "from IPython.display import HTML, display\n", - "from functools import reduce\n", - "import pyproj\n", - "from functools import partial" - ] - }, - { - "cell_type": "markdown", - "id": "238621ee", - "metadata": {}, - "source": [ - "#### Sample ABT" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "91355f76", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0sub_uidgenderagenamechi_indicatorewallet_user_indicatortotal_travel_distanceradius_of_gyrationactivity_entropy
3232glo-sub-023female62Brandi TaylorTrueY187400.3531512773.8009711.393043
2222glo-sub-061male64David EvansFalseY168249.8821201710.550891NaN
1717glo-sub-069male51Kevin GibsonTrueN162675.1993761274.7396661.279325
44glo-sub-076male46Colin MejiaFalseN123646.9216361671.299423NaN
9898glo-sub-046male24Luis JacksonTrueY385134.5682272206.9080601.228771
\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 sub_uid gender age name chi_indicator \\\n", - "32 32 glo-sub-023 female 62 Brandi Taylor True \n", - "22 22 glo-sub-061 male 64 David Evans False \n", - "17 17 glo-sub-069 male 51 Kevin Gibson True \n", - "4 4 glo-sub-076 male 46 Colin Mejia False \n", - "98 98 glo-sub-046 male 24 Luis Jackson True \n", - "\n", - " ewallet_user_indicator total_travel_distance radius_of_gyration \\\n", - "32 Y 187400.353151 2773.800971 \n", - "22 Y 168249.882120 1710.550891 \n", - "17 N 162675.199376 1274.739666 \n", - "4 N 123646.921636 1671.299423 \n", - "98 Y 385134.568227 2206.908060 \n", - "\n", - " activity_entropy \n", - "32 1.393043 \n", - "22 NaN \n", - "17 1.279325 \n", - "4 NaN \n", - "98 1.228771 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "file_path_sample_data = \"C:/Users/10012425/Desktop/sds4gdsp/scoring_base.csv\"\n", - "ABT_mobility = pd.read_csv(file_path_sample_data)\n", - "ABT_mobility.sample(5)" - ] - }, - { - "cell_type": "markdown", - "id": "dc963d69", - "metadata": {}, - "source": [ - "### Apply min max scaling capping" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "c74ec061", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.preprocessing import MinMaxScaler" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a396858e", - "metadata": {}, - "outputs": [], - "source": [ - "df_capping = pd.DataFrame(ABT_mobility, columns=['total_travel_distance', 'radius_of_gyration','activity_entropy'])\n", - "\n", - "column_headers = df_capping.columns.tolist()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "3186fb50", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
total_travel_distanceradius_of_gyrationactivity_entropy
00.0000000.0157410.400992
10.0762530.1823730.504028
20.0985400.1757930.692796
30.1009560.0241020.004103
40.1024640.369021NaN
50.1267860.0000000.128279
60.1308260.0880190.368835
70.1377700.0248370.115453
80.1393320.2237410.638321
90.1410960.1835620.408091
100.1458770.0181460.150974
110.1643020.2248960.669492
120.1668320.1536870.657595
130.1682860.0536730.128041
140.1739870.6529520.862067
150.1936270.0949380.560008
160.2069850.459849NaN
170.2273200.1506940.693717
180.2291920.116003NaN
190.2311660.1091580.392414
200.2325620.1323730.719609
210.2385580.1513860.513148
220.2451550.390631NaN
230.2499580.6944340.734925
240.2508390.1267390.564629
250.2518770.456084NaN
260.2626510.3162130.641308
270.2633910.734126NaN
280.2712440.1876880.644671
290.2787030.2096530.697244
300.2906670.4209930.830829
310.2936850.7456560.881326
320.3064200.9760040.846744
330.3246860.2502940.780299
340.3260030.416509NaN
350.3342060.3508790.441645
360.3677640.224580NaN
370.3738480.3926230.465124
380.3933270.5851330.818701
390.3936000.5841210.846100
400.4004790.4870880.749415
410.4062670.6656250.747511
420.4107200.9321200.942203
430.4225810.4153630.388612
440.4350180.4685430.284707
450.4414600.2624360.805165
460.4432890.3730300.488329
470.4542031.0000000.921311
480.4612190.9546610.874746
490.4718290.9091120.890261
500.4760760.6323970.780436
510.4824610.619962NaN
520.4861380.3007760.892967
530.4932350.6877130.836162
540.4954750.5020720.647253
550.5030710.6885360.787323
560.5056410.8526310.950744
570.5088370.899560NaN
580.5189110.6298030.561174
590.5254490.4155420.000000
600.5338440.7603380.998669
610.5392360.924828NaN
620.5397890.7223420.693652
630.5478880.3990180.389120
640.5508580.530149NaN
650.5604270.5807560.726224
660.5734600.8674761.000000
670.5759760.6565490.806102
680.5764790.7741210.934544
690.5828660.5776730.694904
700.5847900.4910390.128070
710.5849500.7455140.886276
720.5862600.717353NaN
730.6147570.6731940.603687
740.6481910.7829510.667259
750.6501320.6204190.868859
760.6539330.7932900.665075
770.6619560.5445670.687681
780.6625190.740416NaN
790.6693780.5563510.691892
800.6826960.4326830.498591
810.6834420.562822NaN
820.6970610.8981990.628065
830.7017130.6352780.682781
840.7078480.628726NaN
850.7082910.5251850.548055
860.7539640.880014NaN
870.7543760.6488790.705661
880.7713810.6836480.770484
890.7802100.6901890.909890
900.7877430.745578NaN
910.7977440.5643130.521526
920.8154820.8443220.917914
930.8210480.5781630.781067
940.8324750.756700NaN
950.8647730.5972440.474646
960.8672140.5775400.861811
970.9203670.6677630.592899
980.9389980.6639010.625688
991.0000000.5688070.497987
\n", - "
" - ], - "text/plain": [ - " total_travel_distance radius_of_gyration activity_entropy\n", - "0 0.000000 0.015741 0.400992\n", - "1 0.076253 0.182373 0.504028\n", - "2 0.098540 0.175793 0.692796\n", - "3 0.100956 0.024102 0.004103\n", - "4 0.102464 0.369021 NaN\n", - "5 0.126786 0.000000 0.128279\n", - "6 0.130826 0.088019 0.368835\n", - "7 0.137770 0.024837 0.115453\n", - "8 0.139332 0.223741 0.638321\n", - "9 0.141096 0.183562 0.408091\n", - "10 0.145877 0.018146 0.150974\n", - "11 0.164302 0.224896 0.669492\n", - "12 0.166832 0.153687 0.657595\n", - "13 0.168286 0.053673 0.128041\n", - "14 0.173987 0.652952 0.862067\n", - "15 0.193627 0.094938 0.560008\n", - "16 0.206985 0.459849 NaN\n", - "17 0.227320 0.150694 0.693717\n", - "18 0.229192 0.116003 NaN\n", - "19 0.231166 0.109158 0.392414\n", - "20 0.232562 0.132373 0.719609\n", - "21 0.238558 0.151386 0.513148\n", - "22 0.245155 0.390631 NaN\n", - "23 0.249958 0.694434 0.734925\n", - "24 0.250839 0.126739 0.564629\n", - "25 0.251877 0.456084 NaN\n", - "26 0.262651 0.316213 0.641308\n", - "27 0.263391 0.734126 NaN\n", - "28 0.271244 0.187688 0.644671\n", - "29 0.278703 0.209653 0.697244\n", - "30 0.290667 0.420993 0.830829\n", - "31 0.293685 0.745656 0.881326\n", - "32 0.306420 0.976004 0.846744\n", - "33 0.324686 0.250294 0.780299\n", - "34 0.326003 0.416509 NaN\n", - "35 0.334206 0.350879 0.441645\n", - "36 0.367764 0.224580 NaN\n", - "37 0.373848 0.392623 0.465124\n", - "38 0.393327 0.585133 0.818701\n", - "39 0.393600 0.584121 0.846100\n", - "40 0.400479 0.487088 0.749415\n", - "41 0.406267 0.665625 0.747511\n", - "42 0.410720 0.932120 0.942203\n", - "43 0.422581 0.415363 0.388612\n", - "44 0.435018 0.468543 0.284707\n", - "45 0.441460 0.262436 0.805165\n", - "46 0.443289 0.373030 0.488329\n", - "47 0.454203 1.000000 0.921311\n", - "48 0.461219 0.954661 0.874746\n", - "49 0.471829 0.909112 0.890261\n", - "50 0.476076 0.632397 0.780436\n", - "51 0.482461 0.619962 NaN\n", - "52 0.486138 0.300776 0.892967\n", - "53 0.493235 0.687713 0.836162\n", - "54 0.495475 0.502072 0.647253\n", - "55 0.503071 0.688536 0.787323\n", - "56 0.505641 0.852631 0.950744\n", - "57 0.508837 0.899560 NaN\n", - "58 0.518911 0.629803 0.561174\n", - "59 0.525449 0.415542 0.000000\n", - "60 0.533844 0.760338 0.998669\n", - "61 0.539236 0.924828 NaN\n", - "62 0.539789 0.722342 0.693652\n", - "63 0.547888 0.399018 0.389120\n", - "64 0.550858 0.530149 NaN\n", - "65 0.560427 0.580756 0.726224\n", - "66 0.573460 0.867476 1.000000\n", - "67 0.575976 0.656549 0.806102\n", - "68 0.576479 0.774121 0.934544\n", - "69 0.582866 0.577673 0.694904\n", - "70 0.584790 0.491039 0.128070\n", - "71 0.584950 0.745514 0.886276\n", - "72 0.586260 0.717353 NaN\n", - "73 0.614757 0.673194 0.603687\n", - "74 0.648191 0.782951 0.667259\n", - "75 0.650132 0.620419 0.868859\n", - "76 0.653933 0.793290 0.665075\n", - "77 0.661956 0.544567 0.687681\n", - "78 0.662519 0.740416 NaN\n", - "79 0.669378 0.556351 0.691892\n", - "80 0.682696 0.432683 0.498591\n", - "81 0.683442 0.562822 NaN\n", - "82 0.697061 0.898199 0.628065\n", - "83 0.701713 0.635278 0.682781\n", - "84 0.707848 0.628726 NaN\n", - "85 0.708291 0.525185 0.548055\n", - "86 0.753964 0.880014 NaN\n", - "87 0.754376 0.648879 0.705661\n", - "88 0.771381 0.683648 0.770484\n", - "89 0.780210 0.690189 0.909890\n", - "90 0.787743 0.745578 NaN\n", - "91 0.797744 0.564313 0.521526\n", - "92 0.815482 0.844322 0.917914\n", - "93 0.821048 0.578163 0.781067\n", - "94 0.832475 0.756700 NaN\n", - "95 0.864773 0.597244 0.474646\n", - "96 0.867214 0.577540 0.861811\n", - "97 0.920367 0.667763 0.592899\n", - "98 0.938998 0.663901 0.625688\n", - "99 1.000000 0.568807 0.497987" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create a MinMaxScaler instance\n", - "scaler = MinMaxScaler()\n", - "\n", - "# Fit the scaler on the data and transform it\n", - "scaled_data = scaler.fit_transform(df_capping)\n", - "\n", - "# Convert scaled data back to a DataFrame\n", - "scaled_df = pd.DataFrame(scaled_data, columns=column_headers)\n", - "scaled_df" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "094e9cc1", - "metadata": {}, - "outputs": [], - "source": [ - "scaled_df = scaled_df.fillna(0)\n", - "\n", - "scaled_df[\"mobility_index\"] = (scaled_df[\"total_travel_distance\"] + scaled_df[\"radius_of_gyration\"] + scaled_df[\"activity_entropy\"]) /3" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "1c0b40ab", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
total_travel_distanceradius_of_gyrationactivity_entropymobility_index
930.8210480.5781630.7810670.726759
20.0985400.1757930.6927960.322376
880.7713810.6836480.7704840.741838
270.2633910.7341260.0000000.332506
100.1458770.0181460.1509740.104999
\n", - "
" - ], - "text/plain": [ - " total_travel_distance radius_of_gyration activity_entropy \\\n", - "93 0.821048 0.578163 0.781067 \n", - "2 0.098540 0.175793 0.692796 \n", - "88 0.771381 0.683648 0.770484 \n", - "27 0.263391 0.734126 0.000000 \n", - "10 0.145877 0.018146 0.150974 \n", - "\n", - " mobility_index \n", - "93 0.726759 \n", - "2 0.322376 \n", - "88 0.741838 \n", - "27 0.332506 \n", - "10 0.104999 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "scaled_df.sample(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "f2777ad1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
total_travel_distanceradius_of_gyrationactivity_entropymobility_indexCategory
460.4432890.3730300.4883290.434882Mid
370.3738480.3926230.4651240.410532Mid
620.5397890.7223420.6936520.651928Mid
400.4004790.4870880.7494150.545661Mid
150.1936270.0949380.5600080.282858Low
\n", - "
" - ], - "text/plain": [ - " total_travel_distance radius_of_gyration activity_entropy \\\n", - "46 0.443289 0.373030 0.488329 \n", - "37 0.373848 0.392623 0.465124 \n", - "62 0.539789 0.722342 0.693652 \n", - "40 0.400479 0.487088 0.749415 \n", - "15 0.193627 0.094938 0.560008 \n", - "\n", - " mobility_index Category \n", - "46 0.434882 Mid \n", - "37 0.410532 Mid \n", - "62 0.651928 Mid \n", - "40 0.545661 Mid \n", - "15 0.282858 Low " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "low_threshold = 0.3\n", - "high_threshold = 0.7\n", - "\n", - "# Create a function to categorize values\n", - "def categorize(value):\n", - " if value < low_threshold:\n", - " return 'Low'\n", - " elif value < high_threshold:\n", - " return 'Mid'\n", - " elif value > high_threshold:\n", - " return 'High'\n", - " else:\n", - " return 'Low'\n", - "\n", - "# Apply the categorize function to the 'Value' column\n", - "scaled_df['Category'] = scaled_df['mobility_index'].apply(categorize)\n", - "\n", - "scaled_df.sample(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e8536d1d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Category\n", - "High 21\n", - "Low 18\n", - "Mid 61\n", - "dtype: int64" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mobility_class = pd.DataFrame(scaled_df)\n", - "scaled_df.groupby('Category').size()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06681c5d", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}