Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

push #74

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open

push #74

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
318 changes: 318 additions & 0 deletions EksplorasiData_Praveen Arvind Krisna.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Nama : Praveen Arvind Krisna\n",
"#NIM : 13219022\n",
"#Asal Universitas : Institut Teknologi Bandung\n",
"#Tanggal pengerjaan tugas : 20/11/2020\n",
"#Sumber Dataset = TakeMeOut\n",
"#Alur data : Data Observation - Data Cleaning - Data Splitting - Model Development\n",
"import pandas as pd\n",
"import array as ar\n",
"from sklearn.neighbors import NearestNeighbors\n",
"import numpy as np\n",
"from scipy import stats"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('takemeout.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Data Cleaning"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = df.rename(columns = {'Siapa nama kamu?':'nama', 'Cewek atau cowok nih?':'gender', 'Seberapa penting quality time bareng calon pacar untuk kamu?':'time','Seberapa penting physical touch sama calon pacar untuk kamu?':'touch','Seberapa penting word of affirmation dari calon pacar untuk kamu?':'word', 'Seberapa penting dapet kado dari calon pacar untuk kamu?':'kado','Seberapa penting bantuan dari calon pacar untuk kamu?':'help' })"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = df.drop('Timestamp', axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Data Splitting"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_m = df[df['gender'] == \"Cowok\"]\n",
"m_val = df_m.drop(['nama', 'gender'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_f = df[df['gender'] == \"Cewek\"]\n",
"f_val = df_f.drop(['nama', 'gender'], axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Basic Statistic group by Gender"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Rata rata tiap gender untuk tiap kolom\n",
"df.groupby([\"gender\"]).mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#value minimal tiap gender untuk tiap kolom\n",
"df.groupby([\"gender\"]).min()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#value maksimal tiap gender untuk tiap kolom\n",
"df.groupby([\"gender\"]).max()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#nilai tengah untuk tiap gender di tiap kolom\n",
"df.groupby([\"gender\"]).median()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Nilai terbanyak untuk tiap kolom dari gender sampai help\n",
"dropNama = df.drop(['nama'], axis=1)\n",
"stats.mode(dropNama)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#jumlah cowok\n",
"len(m_val)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#jumlah cewek\n",
"len(f_val)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Model Development"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m_coup = NearestNeighbors(n_neighbors=5).fit(m_val)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f_coup = NearestNeighbors(n_neighbors=5).fit(f_val)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = df.drop(['nama', 'gender'], axis=1)\n",
"df_new = df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"nama = df_new['nama'] == 'P********************'\n",
"a = x[nama].values\n",
"p = 0\n",
"for i in range(len(df)):\n",
" if(nama[i] == True):\n",
" p = df.at[i, 'gender']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hasil = 0\n",
"if(p == \"Cowok\"):\n",
" __, hasil = f_coup.kneighbors(a)\n",
"else:\n",
" __, hasil = m_coup.kneighbors(a)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#Analisis untuk 5 pasangan terdekat dengan total values yang sama\n",
"df.iloc[hasil[0]]['nama']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Analisis Untuk pasangan tercocok yang memiliki values yang sama persis\n",
"df_def = 0\n",
"df_comp = 0\n",
"if(p == \"Cewek\"):\n",
" df_def = df_m.drop(['nama', 'gender'], axis=1)\n",
" df_comp = df_m\n",
"elif(p == \"Cowok\"):\n",
" df_def = df_f.drop(['nama', 'gender'], axis=1)\n",
" df_comp = df_f\n",
"jumlah = 0\n",
"for i in range(len(df_def)):\n",
" if(np.all(a == df_def[i:i+1].values)):\n",
" print(df_comp[i:i+1]['nama'])\n",
" else:\n",
" jumlah += 1\n",
"if(jumlah == len(df_def)):\n",
" print(\"Tidak ada pasangan yang cocok sekali\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Analisis Untuk Pasangan dengan range maksimal 1 poin untuk masing masing values\n",
"jumlah = 0\n",
"for i in range(len(df_def)):\n",
" data = df_def[i:i+1].values\n",
" counter = 0\n",
" for j in range(5):\n",
" if(data[0, j] <= a[0, j]+1):\n",
" if(data[0, j] >= a[0, j]-1):\n",
" counter += 1\n",
" if(counter == 5):\n",
" print(df_comp[i:i+1]['nama'])\n",
" counter = 0\n",
" else: \n",
" counter = 0\n",
" jumlah += 1\n",
"if(jumlah == len(df_def)):\n",
" print(\"Tidak ada pasangan yang cocok sekali\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions bucket.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Praveen_Arvind_Krisna
SubmissionBucket
Loading