From 7d62f5b704301ad4b04c5dcc322a8972b11c9151 Mon Sep 17 00:00:00 2001
From: Kushagra Taneja <kushagrataneja2004@gmail.com>
Date: Tue, 29 Oct 2024 14:55:33 +0530
Subject: [PATCH 1/3] Feature Business-performance-forecasting

---
 App.py                                        |  16 ++++++
 .../business_performance_forecasting.json     |  31 +++++++++++
 .../data/50_Startups.csv                      |  51 ++++++++++++++++++
 .../business_performance_forecasting/model.py |  14 +++++
 .../notebooks/Business_forecasting.py         |  43 +++++++++++++++
 .../predict.py                                |  20 +++++++
 .../saved_models/model.pkl                    | Bin 0 -> 499 bytes
 .../saved_models/scaler.pkl                   | Bin 0 -> 1335 bytes
 pages/Business_Performance_Forecasting.py     |   4 ++
 pages/pages.json                              |  23 ++++++++
 10 files changed, 202 insertions(+)
 create mode 100644 form_configs/business_performance_forecasting.json
 create mode 100644 models/business_performance_forecasting/data/50_Startups.csv
 create mode 100644 models/business_performance_forecasting/model.py
 create mode 100644 models/business_performance_forecasting/notebooks/Business_forecasting.py
 create mode 100644 models/business_performance_forecasting/predict.py
 create mode 100644 models/business_performance_forecasting/saved_models/model.pkl
 create mode 100644 models/business_performance_forecasting/saved_models/scaler.pkl
 create mode 100644 pages/Business_Performance_Forecasting.py

diff --git a/App.py b/App.py
index 8db18a02..4e156490 100644
--- a/App.py
+++ b/App.py
@@ -176,6 +176,22 @@
             Ideal for travel, business meetings, and language learning, breaking down language barriers effortlessly.
         """
     },
+    {
+        "name": "Business Performance Forecaster",
+        "description": "Forecast business profits based on various investment areas for better financial planning and budget allocation.",
+        "details": """
+            ### Overview  
+            The Business Performance Forecaster predicts company profit based on investment in R&D, administration, and marketing, using machine learning to analyze investment patterns and optimize budget allocation.
+
+            ### Key Features  
+            - **Profit Prediction**: Provides an estimated profit based on investment data.
+            - **Investment Analysis**: Evaluates how different spending areas impact overall profit.
+            - **Multi-Input Support**: Accounts for multiple variables like R&D, administration, and marketing expenses.
+
+            ### Use Cases  
+            Useful for companies looking to plan budgets, assess the impact of investments, and improve decision-making processes in financial forecasting.
+        """
+    }
 ]
 
 # Define shades of blue for calculators
diff --git a/form_configs/business_performance_forecasting.json b/form_configs/business_performance_forecasting.json
new file mode 100644
index 00000000..59711743
--- /dev/null
+++ b/form_configs/business_performance_forecasting.json
@@ -0,0 +1,31 @@
+{
+  "Business Forecast Form": {
+    "R&D Spend": {
+      "type": "number",
+      "min_value": 0.0,
+      "default_value": 100000.0,
+      "step": 1000.0,
+      "field_name": "RnD_Spend"
+    },
+    "Administration": {
+      "type": "number",
+      "min_value": 0.0,
+      "default_value": 50000.0,
+      "step": 1000.0,
+      "field_name": "Administration"
+    },
+    "Marketing Spend": {
+      "type": "number",
+      "min_value": 0.0,
+      "default_value": 100000.0,
+      "step": 1000.0,
+      "field_name": "Marketing_Spend"
+    },
+    "State": {
+      "type": "dropdown",
+      "options": ["New York", "California", "Florida"],
+      "default_value": "New York",
+      "field_name": "State"
+    }
+  }
+}
diff --git a/models/business_performance_forecasting/data/50_Startups.csv b/models/business_performance_forecasting/data/50_Startups.csv
new file mode 100644
index 00000000..b1cc5f20
--- /dev/null
+++ b/models/business_performance_forecasting/data/50_Startups.csv
@@ -0,0 +1,51 @@
+R&D Spend,Administration,Marketing Spend,State,Profit
+165349.2,136897.8,471784.1,New York,192261.83
+162597.7,151377.59,443898.53,California,191792.06
+153441.51,101145.55,407934.54,Florida,191050.39
+144372.41,118671.85,383199.62,New York,182901.99
+142107.34,91391.77,366168.42,Florida,166187.94
+131876.9,99814.71,362861.36,New York,156991.12
+134615.46,147198.87,127716.82,California,156122.51
+130298.13,145530.06,323876.68,Florida,155752.6
+120542.52,148718.95,311613.29,New York,152211.77
+123334.88,108679.17,304981.62,California,149759.96
+101913.08,110594.11,229160.95,Florida,146121.95
+100671.96,91790.61,249744.55,California,144259.4
+93863.75,127320.38,249839.44,Florida,141585.52
+91992.39,135495.07,252664.93,California,134307.35
+119943.24,156547.42,256512.92,Florida,132602.65
+114523.61,122616.84,261776.23,New York,129917.04
+78013.11,121597.55,264346.06,California,126992.93
+94657.16,145077.58,282574.31,New York,125370.37
+91749.16,114175.79,294919.57,Florida,124266.9
+86419.7,153514.11,0,New York,122776.86
+76253.86,113867.3,298664.47,California,118474.03
+78389.47,153773.43,299737.29,New York,111313.02
+73994.56,122782.75,303319.26,Florida,110352.25
+67532.53,105751.03,304768.73,Florida,108733.99
+77044.01,99281.34,140574.81,New York,108552.04
+64664.71,139553.16,137962.62,California,107404.34
+75328.87,144135.98,134050.07,Florida,105733.54
+72107.6,127864.55,353183.81,New York,105008.31
+66051.52,182645.56,118148.2,Florida,103282.38
+65605.48,153032.06,107138.38,New York,101004.64
+61994.48,115641.28,91131.24,Florida,99937.59
+61136.38,152701.92,88218.23,New York,97483.56
+63408.86,129219.61,46085.25,California,97427.84
+55493.95,103057.49,214634.81,Florida,96778.92
+46426.07,157693.92,210797.67,California,96712.8
+46014.02,85047.44,205517.64,New York,96479.51
+28663.76,127056.21,201126.82,Florida,90708.19
+44069.95,51283.14,197029.42,California,89949.14
+20229.59,65947.93,185265.1,New York,81229.06
+38558.51,82982.09,174999.3,California,81005.76
+28754.33,118546.05,172795.67,California,78239.91
+27892.92,84710.77,164470.71,Florida,77798.83
+23640.93,96189.63,148001.11,California,71498.49
+15505.73,127382.3,35534.17,New York,69758.98
+22177.74,154806.14,28334.72,California,65200.33
+1000.23,124153.04,1903.93,New York,64926.08
+1315.46,115816.21,297114.46,Florida,49490.75
+0,135426.92,0,California,42559.73
+542.05,51743.15,0,New York,35673.41
+0,116983.8,45173.06,California,14681.4
\ No newline at end of file
diff --git a/models/business_performance_forecasting/model.py b/models/business_performance_forecasting/model.py
new file mode 100644
index 00000000..691281f3
--- /dev/null
+++ b/models/business_performance_forecasting/model.py
@@ -0,0 +1,14 @@
+import pickle
+import os
+model_path = os.path.join(os.path.dirname(__file__), 'saved_models', 'model.pkl')
+scaler_path = os.path.join(os.path.dirname(__file__), 'saved_models', 'scaler.pkl')
+
+
+# Load the saved model and scaler
+def load_model_and_scaler():
+    with open(model_path, 'rb') as model_file:
+        model = pickle.load(model_file)
+    with open(scaler_path, 'rb') as scaler_file:
+        scaler = pickle.load(scaler_file)
+
+    return model, scaler
diff --git a/models/business_performance_forecasting/notebooks/Business_forecasting.py b/models/business_performance_forecasting/notebooks/Business_forecasting.py
new file mode 100644
index 00000000..ae052b4c
--- /dev/null
+++ b/models/business_performance_forecasting/notebooks/Business_forecasting.py
@@ -0,0 +1,43 @@
+import numpy as np 
+import pandas as pd 
+import pickle
+import os
+
+# Load the data
+df = pd.read_csv('50_Startups.csv')
+X = df.iloc[:, :-1].values
+y = df.iloc[:, -1].values
+
+# Preprocessing - Encoding categorical data
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
+X = np.array(ct.fit_transform(X))
+
+# Splitting the dataset
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+# Training the model
+from sklearn.linear_model import LinearRegression
+model = LinearRegression()
+model.fit(X_train, y_train)
+
+# Make predictions
+y_pred = model.predict(X_test)
+
+# Print predictions alongside actual values
+np.set_printoptions(precision=2)
+print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), axis=1))
+
+model_path = os.path.abspath("model.pkl")
+scaler_path = os.path.abspath("scaler.pkl")
+
+# Save the model and preprocessing objects
+with open(model_path, 'wb') as model_file:
+    pickle.dump(model, model_file)
+
+with open(scaler_path, 'wb') as scaler_file:
+    pickle.dump(ct, scaler_file)
+
+print("Model and preprocessing objects saved successfully!")
diff --git a/models/business_performance_forecasting/predict.py b/models/business_performance_forecasting/predict.py
new file mode 100644
index 00000000..7298c9be
--- /dev/null
+++ b/models/business_performance_forecasting/predict.py
@@ -0,0 +1,20 @@
+# import os
+import numpy as np
+from models.business_performance_forecasting.model import load_model_and_scaler  # Import the function from model.py
+
+# Define the prediction function
+def get_prediction(RnD_Spend, Administration, Marketing_Spend, State):
+    # Load the model and scaler
+    model, scaler = load_model_and_scaler()
+    # Prepare input features as a NumPy array
+    input_data = np.array([[RnD_Spend, Administration, Marketing_Spend, State]])
+    
+    # Apply the scaler
+    scaled_data = scaler.transform(input_data)
+    scaled_data = scaled_data.astype(float)  
+    
+    # Make prediction using the loaded model
+    prediction = model.predict(scaled_data)
+    
+    return prediction[0]  # Return the predicted profit
+
diff --git a/models/business_performance_forecasting/saved_models/model.pkl b/models/business_performance_forecasting/saved_models/model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..3786fb97e0cba398ae352bec16c1ba7e02e2d6a0
GIT binary patch
literal 499
zcmZo*nfii}0StPiinDW46N~cnax(LPbbM}pN@|W?d{Sa@>XaS<AFyChYI;#>adBpT
z-jvBxG#jVXPSNP$P0K8a&&(@HElN%;D4EjH!<L+1P#GTqV&%nW<tG(S@$2Cz$S=+;
z$t+8q(%Hk87oV1zSW*f!38*iAiZ@#iYjS>STKtqA(Y(^!f=a#Q{GwF7+|rzq%*3Li
z#L6i>Jn==T$@zK3B}JvlB|xitSiuUX^swipK$J|L;?2-H#hEc_O8b<cDH`64-fXQ?
zGWdE}Q%WieQbAgn(k!NQc0gpAdl+q|`1$#H{RaXt;muGoC8^WdK#3=X>-LIJhxfbE
zr)ar$AJF|JZOlKR!+}9e%hm3{3wwrNum9R#TVT)NFm-;<zD4#^fZBRkixTs)fgWM)
z;VjO~OE1j<2H%tn-V6bdn=*uuT_gpx<7sbl$;SN}j!WBZGJYF#Ir0W?`gJRt!Lg_H
zwD<RHJ_ofK(^g-<&*@;V?w;nLzR(tI2Nyhufkt!puoWjK0<8ghEJMnf<KCQE?+f-k
hahTEu^n8y%JTxlf%TkNLk=4U$sAsBYG^I324**CU%yj?&

literal 0
HcmV?d00001

diff --git a/models/business_performance_forecasting/saved_models/scaler.pkl b/models/business_performance_forecasting/saved_models/scaler.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..9345e92315facd1b85126ec7fa2c8ca5e1d1ec40
GIT binary patch
literal 1335
zcmZ{k%WmXE6o%8&om(fJNfa_<fG8ueU=XAwu3|Tfhyh{ABOx{{qEgvD-Q{V!%G6~t
zkwAjQD2Y_N6i>zj!C!9EF#^1ByQ<EqbNRp1zm9(YczvyX{QG865!Z5>tGZE!rYu*L
zt0l9VOEXitCLR32dAs>TZ|^RA_*?iTJoURhOA{`F-z1r<BD$UaeR-p)(JCinN;#jh
z&cFmec_HarWxwt`R+o-*ZfUM`NiZ65XBBKs!?hu%oNb&9H-5daR}BTfE?wPRA@HnH
z+<y6axC?$ya9LEuoLoqCDPcrfE95jPv}!`?Cv_>=Ow&&e*<4{`uja2=FKp_MX2h-2
z#3Zi?eW|lj#<DhYC1Du+KI;l(uSlCx#W0^tznFd+Ty_zXA-@jXZ)>V~DcgqZG~5^~
zv~qI+_pTspkP)gX!Z*+U_^f^WhGZ{QW{}rmufOb)%DJ#{dlMlu2o`Sr<L;C7^EI(g
zXwoq>%!!rKvQ;8cV}r9jbYNMcfdLE2$ZvJ}M<vAx_a}W2m{qVt^YJc9$Qc0;n{BD$
z^ddY>){;T;fSe!S2Mgj<JP~Kt(BvNiGGspK)n*t%;x;i}f+Z$mPdvO4`{Gc%C62_S
zRJ@&v<5WBr@AyMx(1~Y(6VECt!maoq8Tg%h4dadesHFpZYns-svL)A=gO**S2qBHt
zF1J8N9QaLHED!HOvi2sN53+C_zDHSuWcVf;bi{$L2QR(~w>Qhvp&$Gl(lq_<Pdx1_
zd1ga)i*o$<oL40zkR=D8&nqBZ;A<<T%O^i7y$A$L?vGi-l9k14#%Go^n}d-C!vdwZ
zS4UXvvz}&9IO+t@#Jl1>mx+^WblZRQKVvs@GPj_#FKO_H&%5paobFL{LfqX_M#hPC
z>#bL)tH6*(6?vXhW7$$(a5c#V<qJsEy1?S#-(>o+l5}R(8Gg{2#U<l<9>Ly$8@pdV
zTFuyU?!l*=m2;5iw^#Jwf`Voo>IMS{hFv=V9r=fyUxzGPGV8Z8R{{o!|Ce=B0cIco
Ve1~b&6v)0zhRH^|aLe1Ke*s6862AZd

literal 0
HcmV?d00001

diff --git a/pages/Business_Performance_Forecasting.py b/pages/Business_Performance_Forecasting.py
new file mode 100644
index 00000000..2b413057
--- /dev/null
+++ b/pages/Business_Performance_Forecasting.py
@@ -0,0 +1,4 @@
+from page_handler import PageHandler
+
+page_handler = PageHandler("pages/pages.json")
+page_handler.render_page("Business Performance Forecasting")
diff --git a/pages/pages.json b/pages/pages.json
index cc76fe56..8143915d 100644
--- a/pages/pages.json
+++ b/pages/pages.json
@@ -195,7 +195,30 @@
         "description": "This model uses a dataset containing demographic and health-related factors to predict the cost of insurance. The features include age, sex, BMI, children, smoker status, and region, with predictions made using the Random Forest algorithm for accurate results. Ensemble techniques like XGBoost will also be used to further enhance the prediction accuracy."
       }
     ]
+  },
+  "Business Performance Forecasting": {
+    "title": "Business Performance Forecasting",
+    "page_title": "Business Performance Forecasting",
+    "page_icon": "\ud83c\udf3e",
+    "model_predict_file_path": "models/business_performance_forecasting/predict.py",
+    "model_function": "get_prediction",
+    "model_detail_function": "model_details",
+    "form_config_path": "form_configs/business_performance_forecasting.json",
+    "tabs": [
+      {
+        "name": "Business Forecast Form",
+        "type": "form",
+        "form_name": "Business Forecast Form"
+      },
+      {
+        "name": "Model Details",
+        "type": "model_details",
+        "problem_statement": "The Business Performance Forecasting model predicts future profits based on R&D spend, administration costs, marketing spend, and state. By utilizing machine learning, this tool assists businesses in making informed decisions about resource allocation.",
+        "description": "This model employs a dataset with features including R&D spend, administration costs, marketing spend, and geographic location to forecast profits. The predictions are generated using regression techniques, ensuring accuracy and reliability for business strategy planning."
+      }
+    ]
   }
+
 }
 
 

From 9b00f5141b4a531d02c51060b0a47b5dbbdea1f3 Mon Sep 17 00:00:00 2001
From: Kushagra Taneja <kushagrataneja2004@gmail.com>
Date: Tue, 29 Oct 2024 19:01:23 +0530
Subject: [PATCH 2/3] added model_details function

---
 .../notebooks/Business_forecasting.py         |  38 +++++++++++++++++-
 .../predict.py                                |  24 ++++++++++-
 .../saved_models/evaluation_results.pkl       | Bin 0 -> 93 bytes
 page_handler.py                               |  17 ++++----
 4 files changed, 68 insertions(+), 11 deletions(-)
 create mode 100644 models/business_performance_forecasting/saved_models/evaluation_results.pkl

diff --git a/models/business_performance_forecasting/notebooks/Business_forecasting.py b/models/business_performance_forecasting/notebooks/Business_forecasting.py
index ae052b4c..36bb92c5 100644
--- a/models/business_performance_forecasting/notebooks/Business_forecasting.py
+++ b/models/business_performance_forecasting/notebooks/Business_forecasting.py
@@ -2,7 +2,8 @@
 import pandas as pd 
 import pickle
 import os
-
+import matplotlib.pyplot as plt
+from sklearn.metrics import r2_score
 # Load the data
 df = pd.read_csv('50_Startups.csv')
 X = df.iloc[:, :-1].values
@@ -41,3 +42,38 @@
     pickle.dump(ct, scaler_file)
 
 print("Model and preprocessing objects saved successfully!")
+
+def save_evaluation_to_pickle(train_X, train_Y, test_X, test_Y, output_file="evaluation_results.pkl"):
+    # Calculate R^2 score
+    train_r2 = r2_score(train_Y, model.predict(train_X))
+    test_r2 = r2_score(test_Y, y_pred)
+
+    # Create plot
+    fig, ax = plt.subplots(figsize=(10, 6))
+    ax.scatter(test_Y, y_pred, alpha=0.6, color='blue', label='Predicted')
+    ax.plot([test_Y.min(), test_Y.max()], [test_Y.min(), test_Y.max()], 'r--', label='Perfect Prediction')
+    ax.set_xlabel("Actual")
+    ax.set_ylabel("Predicted")
+    ax.set_title("Actual vs Predicted Values (Test Set)")
+    ax.legend()
+    ax.grid(True)
+
+    # Save the plot as a PNG file
+    plot_file = "actual_vs_predicted.png"
+    fig.savefig(plot_file)
+
+    # Package results
+    results = {
+        "Train_R2": train_r2,
+        "Test_R2": test_r2,
+        "plot_file": plot_file  # Save the plot file path
+    }
+
+    # Save results to a pickle file
+    with open(output_file, "wb") as f:
+        pickle.dump(results, f)
+
+    print(f"Evaluation and plot data saved to {output_file}")
+    print(f"Plot saved as {plot_file}")
+# Run this function once to generate the evaluation file
+save_evaluation_to_pickle(X_train, y_train, X_test, y_test)
\ No newline at end of file
diff --git a/models/business_performance_forecasting/predict.py b/models/business_performance_forecasting/predict.py
index 7298c9be..44e53e76 100644
--- a/models/business_performance_forecasting/predict.py
+++ b/models/business_performance_forecasting/predict.py
@@ -1,10 +1,14 @@
-# import os
+import os
 import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pickle
 from models.business_performance_forecasting.model import load_model_and_scaler  # Import the function from model.py
 
 # Define the prediction function
 def get_prediction(RnD_Spend, Administration, Marketing_Spend, State):
-    # Load the model and scaler
+    # Load the model and scalers
     model, scaler = load_model_and_scaler()
     # Prepare input features as a NumPy array
     input_data = np.array([[RnD_Spend, Administration, Marketing_Spend, State]])
@@ -18,3 +22,19 @@ def get_prediction(RnD_Spend, Administration, Marketing_Spend, State):
     
     return prediction[0]  # Return the predicted profit
 
+
+class ModelEvaluation:
+    def __init__(self):
+        metrics_file= os.path.join(os.path.dirname(__file__), 'saved_models', 'evaluation_results.pkl')
+        # Load evaluation metrics from a pickle file
+        with open(metrics_file, "rb") as f:
+            self.metrics = pickle.load(f)
+        print("Loaded metrics:", self.metrics)
+    def evaluate(self):
+        metrics = self.metrics       
+        return metrics, None, None, None
+
+def model_details():
+    evaluator = ModelEvaluation()
+    return evaluator
+
diff --git a/models/business_performance_forecasting/saved_models/evaluation_results.pkl b/models/business_performance_forecasting/saved_models/evaluation_results.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..a2f0a591d8223c68271e25360be22b7e6037b1b1
GIT binary patch
literal 93
zcmZo*nHt0Z0ku;!dN@Lg5;ODSgN&xQ+rLYH`9L*5pocvqwYUT#^!BA(=;UWjJ)8wO
p`6cmbnK`Lbdc+fxOG*=S;>(KT3yM-xGLuVEQ}hb*(x;T_0RREHBUAtY

literal 0
HcmV?d00001

diff --git a/page_handler.py b/page_handler.py
index 75836c50..6f070670 100644
--- a/page_handler.py
+++ b/page_handler.py
@@ -85,11 +85,12 @@ def render_model_details(self, model_module,tab):
 
 			# Display the scatter plot for predicted vs actual values
 			#used clear_figure to clear the plot once displayed to avoid conflict 
-			st.subheader("Model Prediction Plot")
-			st.pyplot(prediction_plot, clear_figure=True)
-
-			st.subheader("Error Plot")
-			st.pyplot(error_plot, clear_figure=True)
-			
-			st.subheader("Model Performance Plot")
-			st.pyplot(performance_plot, clear_figure=True)
+			if prediction_plot!=None:
+				st.subheader("Model Prediction Plot")
+				st.pyplot(prediction_plot, clear_figure=True)
+			if error_plot!=None:
+				st.subheader("Error Plot")
+				st.pyplot(error_plot, clear_figure=True)
+			if performance_plot!=None:
+				st.subheader("Model Performance Plot")
+				st.pyplot(performance_plot, clear_figure=True)

From 5990e812b9820044fae6fff4ff48ba1431fcab58 Mon Sep 17 00:00:00 2001
From: Kushagra Taneja <kushagrataneja2004@gmail.com>
Date: Wed, 30 Oct 2024 20:35:28 +0530
Subject: [PATCH 3/3] added the notebook with model_evaluation function

---
 .../notebooks/Business_forecasting.py         |  79 ----
 .../business_performance_forecasting.ipynb    | 417 ++++++++++++++++++
 2 files changed, 417 insertions(+), 79 deletions(-)
 delete mode 100644 models/business_performance_forecasting/notebooks/Business_forecasting.py
 create mode 100644 models/business_performance_forecasting/notebooks/business_performance_forecasting.ipynb

diff --git a/models/business_performance_forecasting/notebooks/Business_forecasting.py b/models/business_performance_forecasting/notebooks/Business_forecasting.py
deleted file mode 100644
index 36bb92c5..00000000
--- a/models/business_performance_forecasting/notebooks/Business_forecasting.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import numpy as np 
-import pandas as pd 
-import pickle
-import os
-import matplotlib.pyplot as plt
-from sklearn.metrics import r2_score
-# Load the data
-df = pd.read_csv('50_Startups.csv')
-X = df.iloc[:, :-1].values
-y = df.iloc[:, -1].values
-
-# Preprocessing - Encoding categorical data
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
-X = np.array(ct.fit_transform(X))
-
-# Splitting the dataset
-from sklearn.model_selection import train_test_split
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
-
-# Training the model
-from sklearn.linear_model import LinearRegression
-model = LinearRegression()
-model.fit(X_train, y_train)
-
-# Make predictions
-y_pred = model.predict(X_test)
-
-# Print predictions alongside actual values
-np.set_printoptions(precision=2)
-print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), axis=1))
-
-model_path = os.path.abspath("model.pkl")
-scaler_path = os.path.abspath("scaler.pkl")
-
-# Save the model and preprocessing objects
-with open(model_path, 'wb') as model_file:
-    pickle.dump(model, model_file)
-
-with open(scaler_path, 'wb') as scaler_file:
-    pickle.dump(ct, scaler_file)
-
-print("Model and preprocessing objects saved successfully!")
-
-def save_evaluation_to_pickle(train_X, train_Y, test_X, test_Y, output_file="evaluation_results.pkl"):
-    # Calculate R^2 score
-    train_r2 = r2_score(train_Y, model.predict(train_X))
-    test_r2 = r2_score(test_Y, y_pred)
-
-    # Create plot
-    fig, ax = plt.subplots(figsize=(10, 6))
-    ax.scatter(test_Y, y_pred, alpha=0.6, color='blue', label='Predicted')
-    ax.plot([test_Y.min(), test_Y.max()], [test_Y.min(), test_Y.max()], 'r--', label='Perfect Prediction')
-    ax.set_xlabel("Actual")
-    ax.set_ylabel("Predicted")
-    ax.set_title("Actual vs Predicted Values (Test Set)")
-    ax.legend()
-    ax.grid(True)
-
-    # Save the plot as a PNG file
-    plot_file = "actual_vs_predicted.png"
-    fig.savefig(plot_file)
-
-    # Package results
-    results = {
-        "Train_R2": train_r2,
-        "Test_R2": test_r2,
-        "plot_file": plot_file  # Save the plot file path
-    }
-
-    # Save results to a pickle file
-    with open(output_file, "wb") as f:
-        pickle.dump(results, f)
-
-    print(f"Evaluation and plot data saved to {output_file}")
-    print(f"Plot saved as {plot_file}")
-# Run this function once to generate the evaluation file
-save_evaluation_to_pickle(X_train, y_train, X_test, y_test)
\ No newline at end of file
diff --git a/models/business_performance_forecasting/notebooks/business_performance_forecasting.ipynb b/models/business_performance_forecasting/notebooks/business_performance_forecasting.ipynb
new file mode 100644
index 00000000..da4c4395
--- /dev/null
+++ b/models/business_performance_forecasting/notebooks/business_performance_forecasting.ipynb
@@ -0,0 +1,417 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "CazISR8X_HUG"
+   },
+   "source": [
+    "# Multiple Linear Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pOyqYHTk_Q57"
+   },
+   "source": [
+    "## Importing the libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "T_YHJjnD_Tja"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import pickle"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vgC61-ah_WIz"
+   },
+   "source": [
+    "## Importing the dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "UrxyEKGn_ez7"
+   },
+   "outputs": [],
+   "source": [
+    "dataset = pd.read_csv('50_Startups.csv')\n",
+    "X = dataset.iloc[:, :-1].values\n",
+    "y = dataset.iloc[:, -1].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 874
+    },
+    "id": "GOB3QhV9B5kD",
+    "outputId": "905a7bca-1889-4d04-920f-5f3ed8211585"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[165349.2 136897.8 471784.1 'New York']\n",
+      " [162597.7 151377.59 443898.53 'California']\n",
+      " [153441.51 101145.55 407934.54 'Florida']\n",
+      " [144372.41 118671.85 383199.62 'New York']\n",
+      " [142107.34 91391.77 366168.42 'Florida']\n",
+      " [131876.9 99814.71 362861.36 'New York']\n",
+      " [134615.46 147198.87 127716.82 'California']\n",
+      " [130298.13 145530.06 323876.68 'Florida']\n",
+      " [120542.52 148718.95 311613.29 'New York']\n",
+      " [123334.88 108679.17 304981.62 'California']\n",
+      " [101913.08 110594.11 229160.95 'Florida']\n",
+      " [100671.96 91790.61 249744.55 'California']\n",
+      " [93863.75 127320.38 249839.44 'Florida']\n",
+      " [91992.39 135495.07 252664.93 'California']\n",
+      " [119943.24 156547.42 256512.92 'Florida']\n",
+      " [114523.61 122616.84 261776.23 'New York']\n",
+      " [78013.11 121597.55 264346.06 'California']\n",
+      " [94657.16 145077.58 282574.31 'New York']\n",
+      " [91749.16 114175.79 294919.57 'Florida']\n",
+      " [86419.7 153514.11 0.0 'New York']\n",
+      " [76253.86 113867.3 298664.47 'California']\n",
+      " [78389.47 153773.43 299737.29 'New York']\n",
+      " [73994.56 122782.75 303319.26 'Florida']\n",
+      " [67532.53 105751.03 304768.73 'Florida']\n",
+      " [77044.01 99281.34 140574.81 'New York']\n",
+      " [64664.71 139553.16 137962.62 'California']\n",
+      " [75328.87 144135.98 134050.07 'Florida']\n",
+      " [72107.6 127864.55 353183.81 'New York']\n",
+      " [66051.52 182645.56 118148.2 'Florida']\n",
+      " [65605.48 153032.06 107138.38 'New York']\n",
+      " [61994.48 115641.28 91131.24 'Florida']\n",
+      " [61136.38 152701.92 88218.23 'New York']\n",
+      " [63408.86 129219.61 46085.25 'California']\n",
+      " [55493.95 103057.49 214634.81 'Florida']\n",
+      " [46426.07 157693.92 210797.67 'California']\n",
+      " [46014.02 85047.44 205517.64 'New York']\n",
+      " [28663.76 127056.21 201126.82 'Florida']\n",
+      " [44069.95 51283.14 197029.42 'California']\n",
+      " [20229.59 65947.93 185265.1 'New York']\n",
+      " [38558.51 82982.09 174999.3 'California']\n",
+      " [28754.33 118546.05 172795.67 'California']\n",
+      " [27892.92 84710.77 164470.71 'Florida']\n",
+      " [23640.93 96189.63 148001.11 'California']\n",
+      " [15505.73 127382.3 35534.17 'New York']\n",
+      " [22177.74 154806.14 28334.72 'California']\n",
+      " [1000.23 124153.04 1903.93 'New York']\n",
+      " [1315.46 115816.21 297114.46 'Florida']\n",
+      " [0.0 135426.92 0.0 'California']\n",
+      " [542.05 51743.15 0.0 'New York']\n",
+      " [0.0 116983.8 45173.06 'California']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VadrvE7s_lS9"
+   },
+   "source": [
+    "## Encoding categorical data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "wV3fD1mbAvsh"
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
+    "ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')\n",
+    "X = np.array(ct.fit_transform(X))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 874
+    },
+    "id": "4ym3HdYeCGYG",
+    "outputId": "9bd9e71a-bae0-45cb-fa26-9a0d480bb560"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.0 0.0 1.0 165349.2 136897.8 471784.1]\n",
+      " [1.0 0.0 0.0 162597.7 151377.59 443898.53]\n",
+      " [0.0 1.0 0.0 153441.51 101145.55 407934.54]\n",
+      " [0.0 0.0 1.0 144372.41 118671.85 383199.62]\n",
+      " [0.0 1.0 0.0 142107.34 91391.77 366168.42]\n",
+      " [0.0 0.0 1.0 131876.9 99814.71 362861.36]\n",
+      " [1.0 0.0 0.0 134615.46 147198.87 127716.82]\n",
+      " [0.0 1.0 0.0 130298.13 145530.06 323876.68]\n",
+      " [0.0 0.0 1.0 120542.52 148718.95 311613.29]\n",
+      " [1.0 0.0 0.0 123334.88 108679.17 304981.62]\n",
+      " [0.0 1.0 0.0 101913.08 110594.11 229160.95]\n",
+      " [1.0 0.0 0.0 100671.96 91790.61 249744.55]\n",
+      " [0.0 1.0 0.0 93863.75 127320.38 249839.44]\n",
+      " [1.0 0.0 0.0 91992.39 135495.07 252664.93]\n",
+      " [0.0 1.0 0.0 119943.24 156547.42 256512.92]\n",
+      " [0.0 0.0 1.0 114523.61 122616.84 261776.23]\n",
+      " [1.0 0.0 0.0 78013.11 121597.55 264346.06]\n",
+      " [0.0 0.0 1.0 94657.16 145077.58 282574.31]\n",
+      " [0.0 1.0 0.0 91749.16 114175.79 294919.57]\n",
+      " [0.0 0.0 1.0 86419.7 153514.11 0.0]\n",
+      " [1.0 0.0 0.0 76253.86 113867.3 298664.47]\n",
+      " [0.0 0.0 1.0 78389.47 153773.43 299737.29]\n",
+      " [0.0 1.0 0.0 73994.56 122782.75 303319.26]\n",
+      " [0.0 1.0 0.0 67532.53 105751.03 304768.73]\n",
+      " [0.0 0.0 1.0 77044.01 99281.34 140574.81]\n",
+      " [1.0 0.0 0.0 64664.71 139553.16 137962.62]\n",
+      " [0.0 1.0 0.0 75328.87 144135.98 134050.07]\n",
+      " [0.0 0.0 1.0 72107.6 127864.55 353183.81]\n",
+      " [0.0 1.0 0.0 66051.52 182645.56 118148.2]\n",
+      " [0.0 0.0 1.0 65605.48 153032.06 107138.38]\n",
+      " [0.0 1.0 0.0 61994.48 115641.28 91131.24]\n",
+      " [0.0 0.0 1.0 61136.38 152701.92 88218.23]\n",
+      " [1.0 0.0 0.0 63408.86 129219.61 46085.25]\n",
+      " [0.0 1.0 0.0 55493.95 103057.49 214634.81]\n",
+      " [1.0 0.0 0.0 46426.07 157693.92 210797.67]\n",
+      " [0.0 0.0 1.0 46014.02 85047.44 205517.64]\n",
+      " [0.0 1.0 0.0 28663.76 127056.21 201126.82]\n",
+      " [1.0 0.0 0.0 44069.95 51283.14 197029.42]\n",
+      " [0.0 0.0 1.0 20229.59 65947.93 185265.1]\n",
+      " [1.0 0.0 0.0 38558.51 82982.09 174999.3]\n",
+      " [1.0 0.0 0.0 28754.33 118546.05 172795.67]\n",
+      " [0.0 1.0 0.0 27892.92 84710.77 164470.71]\n",
+      " [1.0 0.0 0.0 23640.93 96189.63 148001.11]\n",
+      " [0.0 0.0 1.0 15505.73 127382.3 35534.17]\n",
+      " [1.0 0.0 0.0 22177.74 154806.14 28334.72]\n",
+      " [0.0 0.0 1.0 1000.23 124153.04 1903.93]\n",
+      " [0.0 1.0 0.0 1315.46 115816.21 297114.46]\n",
+      " [1.0 0.0 0.0 0.0 135426.92 0.0]\n",
+      " [0.0 0.0 1.0 542.05 51743.15 0.0]\n",
+      " [1.0 0.0 0.0 0.0 116983.8 45173.06]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "WemVnqgeA70k"
+   },
+   "source": [
+    "## Splitting the dataset into the Training set and Test set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Kb_v_ae-A-20"
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "k-McZVsQBINc"
+   },
+   "source": [
+    "## Training the Multiple Linear Regression model on the Training set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "id": "ywPjx0L1BMiD",
+    "outputId": "3417c2b0-6871-423c-a81f-643e35ae9f3e"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {
+      "tags": []
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "regressor = LinearRegression()\n",
+    "regressor.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "xNkXL1YQBiBT"
+   },
+   "source": [
+    "## Predicting the Test set results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 188
+    },
+    "id": "TQKmwvtdBkyb",
+    "outputId": "72da0067-f2e3-48d3-fae7-86ddbf597e5e"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[103015.2  103282.38]\n",
+      " [132582.28 144259.4 ]\n",
+      " [132447.74 146121.95]\n",
+      " [ 71976.1   77798.83]\n",
+      " [178537.48 191050.39]\n",
+      " [116161.24 105008.31]\n",
+      " [ 67851.69  81229.06]\n",
+      " [ 98791.73  97483.56]\n",
+      " [113969.44 110352.25]\n",
+      " [167921.07 166187.94]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_pred = regressor.predict(X_test)\n",
+    "np.set_printoptions(precision=2)\n",
+    "print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MC-XRwjE6x6M"
+   },
+   "source": [
+    "# Saving the model\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "HaEuLbtg_76M"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "model_path = os.path.abspath(\"model.pkl\")\n",
+    "scaler_path = os.path.abspath(\"scaler.pkl\")\n",
+    "\n",
+    "# Save the model and preprocessing objects\n",
+    "with open(model_path, 'wb') as model_file:\n",
+    "    pickle.dump(model, model_file)\n",
+    "\n",
+    "with open(scaler_path, 'wb') as scaler_file:\n",
+    "    pickle.dump(ct, scaler_file)\n",
+    "\n",
+    "print(f\"Model saved at: {model_path}\")\n",
+    "print(f\"Preprocessor saved at: {scaler_path}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "UwurUG9r63EK"
+   },
+   "source": [
+    "# New Section"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "wmPoacS7eWMt"
+   },
+   "outputs": [],
+   "source": [
+    "def model_evaluation(train_X, train_Y, test_X, test_Y, output_file=\"evaluation_results.pkl\"):\n",
+    "    # Calculate R^2 score\n",
+    "    train_r2 = r2_score(train_Y, model.predict(train_X))\n",
+    "    test_r2 = r2_score(test_Y, y_pred)\n",
+    "\n",
+    "  \n",
+    "    \n",
+    "\n",
+    "    # Package results\n",
+    "    results = {\n",
+    "        \"Train_R2\": train_r2,\n",
+    "        \"Test_R2\": test_r2,\n",
+    "        \"plot_file\": plot_file  # Save the plot file path\n",
+    "    }\n",
+    "\n",
+    "    # Save results to a pickle file\n",
+    "    with open(output_file, \"wb\") as f:\n",
+    "        pickle.dump(results, f)\n",
+    "\n",
+    "    print(f\"Evaluation data saved to {output_file}\")\n",
+    "   \n",
+    "# Run this function once to generate the evaluation file\n",
+    "model_evaluation(X_train, y_train, X_test, y_test)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}