Add files via upload

gabbygab1233 · Mar 9, 2021 · 76320fa · 76320fa
1 parent de06c79
commit 76320fa
Show file tree

Hide file tree

Showing 7 changed files with 2,501 additions and 0 deletions.
diff --git a/Crop_recommendation.csv b/Crop_recommendation.csv
diff --git a/Procfile b/Procfile
@@ -0,0 +1 @@
+web: sh setup.sh && streamlit run app.py
diff --git a/app.py b/app.py
@@ -0,0 +1,77 @@
+import streamlit as st 
+import pandas as pd
+import numpy as np
+import os
+import pickle
+import warnings
+
+
+st.beta_set_page_config(page_title="Crop Recommender", page_icon="🌿", layout='centered', initial_sidebar_state="collapsed")
+
+def load_model(modelfile):
+	loaded_model = pickle.load(open(modelfile, 'rb'))
+	return loaded_model
+
+def main():
+    # title
+    html_temp = """
+    <div>
+    <h1 style="color:MEDIUMSEAGREEN;text-align:left;"> Crop Recommendation  🌱 </h1>
+    </div>
+    """
+    st.markdown(html_temp, unsafe_allow_html=True)
+
+    col1,col2  = st.beta_columns([2,2])
+
+    with col1: 
+        with st.beta_expander(" ℹ️ Information", expanded=True):
+            st.write("""
+            Crop recommendation is one of the most important aspects of precision agriculture. Crop recommendations are based on a number of factors. Precision agriculture seeks to define these criteria on a site-by-site basis in order to address crop selection issues. While the "site-specific" methodology has improved performance, there is still a need to monitor the systems' outcomes.Precision agriculture systems aren't all created equal. 
+            However, in agriculture, it is critical that the recommendations made are correct and precise, as errors can result in significant material and capital loss.
+
+            """)
+        '''
+        ## How does it work ❓ 
+        Complete all the parameters and the machine learning model will predict the most suitable crops to grow in a particular farm based on various parameters
+        '''
+
+
+    with col2:
+        st.subheader(" Find out the most suitable crop to grow in your farm 👨‍🌾")
+        N = st.number_input("Nitrogen", 1,10000)
+        P = st.number_input("Phosporus", 1,10000)
+        K = st.number_input("Potassium", 1,10000)
+        temp = st.number_input("Temperature",0.0,100000.0)
+        humidity = st.number_input("Humidity in %", 0.0,100000.0)
+        ph = st.number_input("Ph", 0.0,100000.0)
+        rainfall = st.number_input("Rainfall in mm",0.0,100000.0)
+
+        feature_list = [N, P, K, temp, humidity, ph, rainfall]
+        single_pred = np.array(feature_list).reshape(1,-1)
+
+        if st.button('Predict'):
+
+            loaded_model = load_model('model.pkl')
+            prediction = loaded_model.predict(single_pred)
+            col1.write('''
+		    ## Results 🔍 
+		    ''')
+            col1.success(f"{prediction.item().title()} are recommended by the A.I for your farm.")
+      #code for html ☘️ 🌾 🌳 👨‍🌾  🍃
+
+    st.warning("Note: This A.I application is for educational/demo purposes only and cannot be relied upon. Check the source code [here](https://github.com/gabbygab1233/Crop-Recommendation)")
+    hide_menu_style = """
+    <style>
+    #MainMenu {visibility: hidden;}
+    </style>
+    """
+
+hide_menu_style = """
+        <style>
+        #MainMenu {visibility: hidden;}
+        </style>
+        """
+st.markdown(hide_menu_style, unsafe_allow_html=True)
+
+if __name__ == '__main__':
+	main()
diff --git a/model.pkl b/model.pkl
diff --git a/model.py b/model.py
@@ -0,0 +1,205 @@
+import pandas as pd
+import pandas_profiling as pp
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import warnings
+import os
+import plotly.graph_objects as go
+import plotly.io as pio
+import pickle
+from sklearn.utils import resample
+# Metrics
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, auc, roc_curve
+
+# Validation
+from sklearn.model_selection import train_test_split, cross_val_score, KFold
+from sklearn.pipeline import Pipeline, make_pipeline
+
+# Tuning
+from sklearn.model_selection import GridSearchCV
+
+# Feature Extraction
+from sklearn.feature_selection import RFE
+
+# Preprocessing
+from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, Binarizer, LabelEncoder
+
+# Models
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.linear_model import LogisticRegression
+from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.tree import DecisionTreeClassifier
+
+# Ensembles
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.ensemble import BaggingClassifier
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.ensemble import ExtraTreesClassifier
+
+warnings.filterwarnings('ignore')
+
+
+sns.set_style("whitegrid", {'axes.grid' : False})
+pio.templates.default = "plotly_white"
+
+
+
+################################################################################
+#                                                                              #
+#                            Analyze Data                                      #
+#                                                                              #
+################################################################################
+def explore_data(df):
+    print("Number of Instances and Attributes:", df.shape)
+    print('\n')
+    print('Dataset columns:',df.columns)
+    print('\n')
+    print('Data types of each columns: ', df.info())
+################################################################################
+#                                                                              #
+#                      Checking for Duplicates                                 #
+#                                                                              #
+################################################################################
+def checking_removing_duplicates(df):
+    count_dups = df.duplicated().sum()
+    print("Number of Duplicates: ", count_dups)
+    if count_dups >= 1:
+        df.drop_duplicates(inplace=True)
+        print('Duplicate values removed!')
+    else:
+        print('No Duplicate values')
+################################################################################
+#                                                                              #
+#                Split Data to Training and Validation set                     #
+#                                                                              #
+################################################################################
+def read_in_and_split_data(data, target):
+    X = data.drop(target, axis=1)
+    y = data[target]
+    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=0)
+    return X_train, X_test, y_train, y_test
+################################################################################
+#                                                                              #
+#                        Spot-Check Algorithms                                 #
+#                                                                              #
+################################################################################
+def GetModel():
+    Models = []
+    Models.append(('LR'   , LogisticRegression()))
+    Models.append(('LDA'  , LinearDiscriminantAnalysis()))
+    Models.append(('KNN'  , KNeighborsClassifier()))
+    Models.append(('CART' , DecisionTreeClassifier()))
+    Models.append(('NB'   , GaussianNB()))
+    Models.append(('SVM'  , SVC(probability=True)))
+    return Models
+
+def ensemblemodels():
+    ensembles = []
+    ensembles.append(('AB'   , AdaBoostClassifier()))
+    ensembles.append(('GBM'  , GradientBoostingClassifier()))
+    ensembles.append(('RF'   , RandomForestClassifier()))
+    ensembles.append(( 'Bagging' , BaggingClassifier()))
+    ensembles.append(('ET', ExtraTreesClassifier()))
+    return ensembles
+################################################################################
+#                                                                              #
+#                 Spot-Check Normalized Models                                 #
+#                                                                              #
+################################################################################
+def NormalizedModel(nameOfScaler):
+
+    if nameOfScaler == 'standard':
+        scaler = StandardScaler()
+    elif nameOfScaler =='minmax':
+        scaler = MinMaxScaler()
+    elif nameOfScaler == 'normalizer':
+        scaler = Normalizer()
+    elif nameOfScaler == 'binarizer':
+        scaler = Binarizer()
+
+    pipelines = []
+    pipelines.append((nameOfScaler+'LR'  , Pipeline([('Scaler', scaler),('LR'  , LogisticRegression())])))
+    pipelines.append((nameOfScaler+'LDA' , Pipeline([('Scaler', scaler),('LDA' , LinearDiscriminantAnalysis())])))
+    pipelines.append((nameOfScaler+'KNN' , Pipeline([('Scaler', scaler),('KNN' , KNeighborsClassifier())])))
+    pipelines.append((nameOfScaler+'CART', Pipeline([('Scaler', scaler),('CART', DecisionTreeClassifier())])))
+    pipelines.append((nameOfScaler+'NB'  , Pipeline([('Scaler', scaler),('NB'  , GaussianNB())])))
+    pipelines.append((nameOfScaler+'SVM' , Pipeline([('Scaler', scaler),('SVM' , SVC())])))
+    pipelines.append((nameOfScaler+'AB'  , Pipeline([('Scaler', scaler),('AB'  , AdaBoostClassifier())])  ))
+    pipelines.append((nameOfScaler+'GBM' , Pipeline([('Scaler', scaler),('GMB' , GradientBoostingClassifier())])  ))
+    pipelines.append((nameOfScaler+'RF'  , Pipeline([('Scaler', scaler),('RF'  , RandomForestClassifier())])  ))
+    pipelines.append((nameOfScaler+'ET'  , Pipeline([('Scaler', scaler),('ET'  , ExtraTreesClassifier())])  ))
+
+    return pipelines
+################################################################################
+#                                                                              #
+#                           Train Model                                        #
+#                                                                              #
+################################################################################
+def fit_model(X_train, y_train,models):
+    # Test options and evaluation metric
+    num_folds = 10
+    scoring = 'accuracy'
+
+    results = []
+    names = []
+    for name, model in models:
+        kfold = KFold(n_splits=num_folds, shuffle=True, random_state=0)
+        cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
+        results.append(cv_results)
+        names.append(name)
+        msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
+        print(msg)
+
+    return names, results
+################################################################################
+#                                                                              #
+#                          Save Trained Model                                  #
+#                                                                              #
+################################################################################
+def save_model(model,filename):
+    pickle.dump(model, open(filename, 'wb'))
+################################################################################
+#                                                                              #
+#                          Performance Measure                                 #
+#                                                                              #
+################################################################################
+def classification_metrics(model, conf_matrix):
+    print(f"Training Accuracy Score: {model.score(X_train, y_train) * 100:.1f}%")
+    print(f"Validation Accuracy Score: {model.score(X_test, y_test) * 100:.1f}%")
+    fig,ax = plt.subplots(figsize=(8,6))
+    sns.heatmap(pd.DataFrame(conf_matrix), annot = True, cmap = 'YlGnBu',fmt = 'g')
+    ax.xaxis.set_label_position('top')
+    plt.tight_layout()
+    plt.title('Confusion Matrix', fontsize=20, y=1.1)
+    plt.ylabel('Actual label', fontsize=15)
+    plt.xlabel('Predicted label', fontsize=15)
+    plt.show()
+    print(classification_report(y_test, y_pred))
+
+
+# Load Dataset
+df = pd.read_csv('Crop_recommendation.csv')
+
+# Remove Outliers
+Q1 = df.quantile(0.25)
+Q3 = df.quantile(0.75)
+IQR = Q3 - Q1
+df_out = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]
+
+# Split Data to Training and Validation set
+target ='label'
+X_train, X_test, y_train, y_test = read_in_and_split_data(df, target)
+
+# Train model
+pipeline = make_pipeline(StandardScaler(),  GaussianNB())
+model = pipeline.fit(X_train, y_train)
+y_pred = model.predict(X_test)
+conf_matrix = confusion_matrix(y_test,y_pred)
+classification_metrics(pipeline, conf_matrix)
+
+# save model
+save_model(model, 'model.pkl')
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,8 @@
+numpy==1.18.5
+streamlit==0.69.0
+plotly==4.0.0
+seaborn==0.10.1
+pandas_profiling==2.3.0
+pandas==1.0.5
+matplotlib==3.3.0
+scikit_learn==0.24.1
diff --git a/setup.sh b/setup.sh
@@ -0,0 +1,9 @@
+mkdir -p ~/.streamlit/
+
+echo "\
+[server]\n\
+port = $PORT\n\
+enableCORS = false\n\
+headless = true\n\
+\n\
+" > ~/.streamlit/config.toml