ms_university_prediction.py

# -*- coding: utf-8 -*-
"""MS_University prediction.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1PgymAl_Y219rzSJq8np6yJ-OkbPmQAKf
"""

from google.colab import files 
uploaded = files.upload()

import numpy as np
import pandas as pd
#import os
from matplotlib import pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import seaborn as sns
sns.set(style='white')
sns.set(style='whitegrid', color_codes=True)


df = pd.read_csv('GRE.csv')
df.head()

df.describe()

df.rename(columns = {'Chance of Admit ':'Chance of Admit', 'LOR ':'LOR'}, inplace=True)
df.drop(labels='Serial No.', axis=1, inplace=True)

df.describe()

fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df.corr(), annot=True, cmap='Blues')

plt.figure(figsize=(20,6))
plt.subplot(1,2,1)
sns.distplot(df['CGPA'])
plt.title('CGPA Distribution of Applicants')

plt.subplot(1,2,2)
sns.regplot(df['CGPA'], df['Chance of Admit'])
plt.title('CGPA vs Chance of Admit')

plt.figure(figsize=(20,6))
plt.subplot(1,2,1)
sns.distplot(df['GRE Score'])
plt.title('Distributed GRE Scores of Applicants')

plt.subplot(1,2,2)
sns.regplot(df['GRE Score'], df['Chance of Admit'])
plt.title('GRE Scores vs Chance of Admit')

plt.figure(figsize=(20,6))
plt.subplot(1,2,1)
sns.distplot(df['TOEFL Score'])
plt.title('Distributed TOEFL Scores of Applicants')

plt.subplot(1,2,2)
sns.regplot(df['TOEFL Score'], df['Chance of Admit'])
plt.title('TOEFL Scores vs Chance of Admit')

fig, ax = plt.subplots(figsize=(8,6))
sns.countplot(df['Research'])
plt.title('Research Experience')
plt.ylabel('Number of Applicants')
ax.set_xticklabels(['No Research Experience', 'Has Research Experience'])

fig, ax = plt.subplots(figsize=(8,6))
sns.countplot(df['University Rating'])
plt.title('University Rating')
plt.ylabel('Number of Applicants')

targets = df['Chance of Admit']
features = df.drop(columns = {'Chance of Admit'})

X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
X_train['colleges_encoded'] = labelencoder.fit_transform(X_train['colleges'])
X_test['colleges_encoded'] = labelencoder.fit_transform(X_test['colleges'])
X_train=X_train.drop('colleges',axis=1)
X_test=X_test.drop('colleges',axis=1)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

linreg = LinearRegression()
linreg.fit(X_train, y_train)
y_predict = linreg.predict(X_test)
linreg_score = (linreg.score(X_test, y_test))*100
linreg_score

dec_tree = DecisionTreeRegressor(random_state=0, max_depth=6)
dec_tree.fit(X_train, y_train)
y_predict = dec_tree.predict(X_test)
dec_tree_score = (dec_tree.score(X_test, y_test))*100
dec_tree_score

forest = RandomForestRegressor(n_estimators=110,max_depth=6,random_state=0)
forest.fit(X_train, y_train)
y_predict = forest.predict(X_test)
forest_score = (forest.score(X_test, y_test))*100
forest_score

Methods = ['Linear Regression', 'Decision Trees', 'Random Forests']
Scores = np.array([linreg_score, dec_tree_score, forest_score])

fig, ax = plt.subplots(figsize=(8,6))
sns.barplot(Methods, Scores)
plt.title('Algorithm Prediction Accuracies')
plt.ylabel('Accuracy')
X_train[0]

regressor = LinearRegression()
regressor.fit(X_train,y_train)

# GRE_SCORE = float(input('Enter your GRE Score: '))
# TOEFL_Score = float(input('Enter your TOEFL Score: '))
# UR = float(input('Enter your University ranking (between 1-5) : '))
# SOP = float(input('Enter your Statement of Purpose(between 1-5): '))
# LOR = float(input('Enter your (Latter of recommendation ) score(between 1-5): '))
# CGPA = float(input('Enter your CGPA(between 1-10): '))
# RESEARCH = float(input('Enter your Statement of Purpose(between 1(means you did research) and 0(means no)): '))
#
# VTL = []
# VTL.extend([GRE_SCORE,TOEFL_Score,UR,SOP,LOR,CGPA,RESEARCH])
# LTL = [VTL]
LTL = [[180,	100,	3,	3.5,	2.5,	8.57,	1, 0.32]]
prediction = regressor.predict(LTL)
print('MODEL accuracy is : ',regressor.score(X_test,y_test))
print('Your chances of admission is :',prediction)
destinction = [0.90]
c = [0.80]
e = [0.70]
f = [0.60]
g = [0.40]
if prediction >= destinction :
    print('You have chances of get admission in Harvard university  ')

elif prediction >= c <destinction:
    print('You have chances of get admission in MIT')

elif prediction >=e <c:
    print('You have chances of get admission in Stanford university')

elif prediction >=f <e:
    print('You have chances of get admission in Caltech(California Institute of Technology)')

elif prediction >=g <f:
    print('You have chances of get admission in UOC (University of Chicago)')

elif prediction <g :
    print("You don't have chances of get admission in top colleges !! Better luck next time:))")

forest = RandomForestRegressor(n_estimators=60,max_depth=3,random_state=0)
forest.fit(X_train, y_train)

# GRE_SCORE = float(input('Enter your GRE Score: '))
# TOEFL_Score = float(input('Enter your TOEFL Score: '))
# UR = float(input('Enter your University ranking (between 1-5) : '))
# SOP = float(input('Enter your Statement of Purpose(between 1-5): '))
# LOR = float(input('Enter your (Latter of recommendation ) score(between 1-5): '))
# CGPA = float(input('Enter your CGPA(between 1-10): '))
# RESEARCH = float(input('Enter your Statement of Purpose(between 1(means you did research) and 0(means no)): '))
#
# VTL = []
# VTL.extend([GRE_SCORE,TOEFL_Score,UR,SOP,LOR,CGPA,RESEARCH])
# LTL = [VTL]
LTL = [[150,	80,	3,	3.5,	2.5,	8.57,	1, 0.32]]
prediction = forest.predict(LTL)
print(prediction)
print('MODEL accuracy is : ',forest.score(X_test,y_test))
print('Your chances of admission is :',prediction)
destinction = [0.90]
c = [0.80]
e = [0.70]
f = [0.60]
g = [0.40]
if prediction >= destinction :
    print('You have chances of get admission in Harvard university  ')

elif prediction >= c <destinction:
    print('You have chances of get admission in MIT')

elif prediction >=e <c:
    print('You have chances of get admission in Stanford university')

elif prediction >=f <e:
    print('You have chances of get admission in Caltech(California Institute of Technology)')

elif prediction >=g <f:
    print('You have chances of get admission in UOC (University of Chicago)')

elif prediction <g :
    print("You don't have chances of get admission in top colleges !! Better luck next time:))")