add credit card estimator

yashasvini121 · Oct 27, 2024 · 056ec77 · 056ec77
2 parents f6e7575 + 534c61f
commit 056ec77
Show file tree

Hide file tree

Showing 9 changed files with 3,488 additions and 4 deletions.
diff --git a/App.py b/App.py
@@ -9,7 +9,6 @@
 **Predictive Calc** offers a powerful suite of machine learning models designed to assist you in making informed decisions. Whether it's predicting house prices, determining loan eligibility, or evaluating health risks, we have you covered.
 """)
 
-# Why Choose Calc?
 st.markdown("""
 ## Why Choose Predictive Calc? """)
 features = [
@@ -216,10 +215,7 @@
                         st.write(calc["details"])
                     st.markdown("---")
 
-# Add a "Get Started" section at the bottom
 st.markdown("## Get Started Today!")
 st.markdown("Explore our calculators and take control of your predictive analytics journey!")
 
-
 st.write("Developed with ❤️ by Yashasvini Sharma | [Github](https://www.github.com/yashasvini121) | [LinkedIn](https://www.linkedin.com/in/yashasvini121/)")
-
diff --git a/form_configs/credit_card_fraud.json b/form_configs/credit_card_fraud.json
@@ -0,0 +1,79 @@
+{
+    "Credit Card Fraud Estimator": {
+        "Average Amount per Transaction per Day": {
+            "type": "number",
+            "min_value": 0,
+            "max_value": 100000,
+            "default_value": 100,
+            "step": 100,
+            "field_name": "avg_amount_per_day"
+        },
+        "Transaction Amount": {
+            "type": "number",
+            "min_value": 0,
+            "max_value": 100000,
+            "default_value": 3000,
+            "step": 100,
+            "field_name": "transaction_amount"
+        },
+        "Is Declined": {
+            "type": "dropdown",
+            "options": [
+                "Yes",
+                "No"
+            ],
+            "default_value": "No",
+            "field_name": "Is_declined"
+        },
+        "Total Number of Declines per Day": {
+            "type": "number",
+            "min_value": 0,
+            "max_value": 100,
+            "default_value": 0,
+            "step": 1,
+            "field_name": "no_of_declines_per_day"
+        },
+        "Is Foreign Transaction": {
+            "type": "dropdown",
+            "options": [
+                "Yes",
+                "No"
+            ],
+            "default_value": "No",
+            "field_name": "Is_Foreign_transaction"
+        },
+        "Is High-Risk Country": {
+            "type": "dropdown",
+            "options": [
+                "Yes",
+                "No"
+            ],
+            "default_value": "No",
+            "field_name": "Is_High_Risk_country"
+        },
+        "Daily Chargeback Average Amount": {
+            "type": "number",
+            "min_value": 0,
+            "max_value": 10000,
+            "default_value": 0,
+            "step": 100,
+            "field_name": "Daily_chargeback_avg_amt"
+        },
+        "6-Month Average Chargeback Amount": {
+            "type": "number",
+            "min_value": 0,
+            "max_value": 10000,
+            "default_value": 0,
+            "step": 100,
+            "field_name": "six_month_avg_chbk_amt"
+        },
+        "6-Month Chargeback Frequency": {
+            "type": "number",
+            "min_value": 0,
+            "max_value": 100,
+            "default_value": 0,
+            "step": 1,
+            "field_name": "six_month_chbk_freq"
+        }
+    }
+}
diff --git a/models/credit_card_fraud/data/creditcardcsvpresent.csv b/models/credit_card_fraud/data/creditcardcsvpresent.csv
diff --git a/models/credit_card_fraud/model.py b/models/credit_card_fraud/model.py
@@ -0,0 +1,119 @@
+# importing libraries
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC
+import pandas as pd
+import warnings
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+import pandas as pd
+import pickle
+from models.credit_card_fraud.modelEvaluation import ModelEvaluation
+warnings.filterwarnings("ignore")
+
+
+# reading dataset
+data = pd.read_csv("models\credit_card_fraud\data\creditcardcsvpresent.csv")
+df = data.copy(deep=True)
+
+# df.info()
+
+# remove transaction_date all values are null
+# and also remove merchant id
+df = df.drop(columns=['Merchant_id', 'Transaction date'], axis=1)
+
+
+# encoding for qualitative variables
+code = {
+    "N": 0,
+    "Y": 1 }
+
+for obj in df.select_dtypes("object"):
+    df[obj] = df[obj].map(code)
+
+# Target and Feature Identification
+target = "isFradulent"
+features = [col for col in df.columns if col != target]
+
+X = df[features]  # Create a DataFrame for the features
+y = df[target]   # Create a Series for the target
+
+
+# Split the dataset
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Train SVM Classifier
+svm_model = SVC(kernel='rbf', class_weight='balanced', random_state=42)  # RBF kernel (default) is good for non-linear problems
+svm_model.fit(X_train, y_train)
+
+# Make predictions
+y_pred = svm_model.predict(X_test)
+
+# Function to prepare input data into a DataFrame
+def prepare_input_data(
+        avg_amount_per_day,
+        transaction_amount,
+        Is_declined,
+        no_of_declines_per_day,
+        Is_Foreign_transaction,
+        Is_High_Risk_country,
+        Daily_chargeback_avg_amt,
+        six_month_avg_chbk_amt,
+        six_month_chbk_freq,
+):
+    # Create a DataFrame with the input data
+    input_data = {
+        "Average Amount/transaction/day": [avg_amount_per_day],
+        "Transaction_amount": [transaction_amount],
+        "Is declined": [Is_declined],
+        "Total Number of declines/day": [no_of_declines_per_day],
+        "isForeignTransaction": [Is_Foreign_transaction],
+        "isHighRiskCountry": [Is_High_Risk_country],
+        "Daily_chargeback_avg_amt": [Daily_chargeback_avg_amt],
+        "6_month_avg_chbk_amt": [six_month_avg_chbk_amt],
+        "6-month_chbk_freq": [six_month_chbk_freq],
+    }
+
+    return pd.DataFrame(input_data)
+
+def get_prediction(
+        avg_amount_per_day,
+        transaction_amount,
+        Is_declined,
+        no_of_declines_per_day,
+        Is_Foreign_transaction,
+        Is_High_Risk_country,
+        Daily_chargeback_avg_amt,
+        six_month_avg_chbk_amt,
+        six_month_chbk_freq,
+):
+    # Prepare the input data
+    input_df = prepare_input_data(
+        avg_amount_per_day,
+        transaction_amount,
+        Is_declined,
+        no_of_declines_per_day,
+        Is_Foreign_transaction,
+        Is_High_Risk_country,
+        Daily_chargeback_avg_amt,
+        six_month_avg_chbk_amt,
+        six_month_chbk_freq,
+    )
+    # Predict using Random Forest
+    predicted_value = svm_model.predict(input_df)
+
+    # Return "Fraud" if fraud (1), else "Not a Fraud"
+    return "Fraud" if predicted_value[0] == 1 else "Not a Fraud"
+
+
+# Function to save the model
+def save_model():
+    # Save the Random Forest model
+    model_filename = 'creditCardFraud_svc_model.pkl'
+    with open(model_filename, 'wb') as file:
+        pickle.dump(svm_model, file)
+
+# # Function to evaluate accuracy
+def get_evaluator():
+	evaluator = ModelEvaluation(svm_model, X_train, y_train, X_test, y_test)	
+	return evaluator
+
+# save_model()
diff --git a/models/credit_card_fraud/modelEvaluation.py b/models/credit_card_fraud/modelEvaluation.py
@@ -0,0 +1,108 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import accuracy_score, confusion_matrix
+
+
+class ModelEvaluation:
+    def __init__(self, model, train_X, train_Y, test_X, test_Y):
+        self.model = model
+        self.train_X = train_X
+        self.train_Y = train_Y
+        self.test_X = test_X
+        self.test_Y = test_Y
+        self.evaluation_matrix = pd.DataFrame(
+            np.zeros([1, 8]),
+            columns=[
+                "Train-R2",
+                "Test-R2",
+                "Train-RSS",
+                "Test-RSS",
+                "Train-MSE",
+                "Test-MSE",
+                "Train-RMSE",
+                "Test-RMSE",
+            ],
+        )
+        self.random_column = np.random.choice(
+            train_X.columns[train_X.nunique() >= 50], 1, replace=False
+        )[0]
+
+    def evaluate(self):
+        pred_train = self.model.predict(self.train_X)
+        pred_test = self.model.predict(self.test_X)
+
+        self.update_evaluation_matrix(pred_train, pred_test)
+        metrics = self.get_metrics()
+        prediction_plot = self.plot_predictions(pred_train)
+        error_plot = self.plot_error_terms(pred_train)
+
+        # adding performance graph of the model
+        performance_plot = self.plot_performance_graph()
+
+        return metrics, prediction_plot, error_plot, performance_plot
+
+    def get_metrics(self):
+        """Return a dictionary of evaluation metrics for easy integration."""
+        pred_train = self.model.predict(self.train_X)
+        pred_test = self.model.predict(self.test_X)
+
+        metrics = {
+            "Train_R2": accuracy_score(self.train_Y, pred_train),
+            "Test_R2": accuracy_score(self.test_Y, pred_test),
+            "Train_RSS": np.sum(np.square(self.train_Y - pred_train)),
+            "Test_RSS": np.sum(np.square(self.test_Y - pred_test))
+        }
+        return metrics
+
+    def plot_predictions(self, pred_train):
+        # Predict on test data
+        pred_test = self.model.predict(self.test_X)
+
+        # Calculate confusion matrix
+        cm = confusion_matrix(self.test_Y, pred_test)
+
+        # Plot confusion matrix
+        fig, ax = plt.subplots(figsize=(10, 6))
+        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
+
+        ax.set_title("Confusion Matrix")
+        ax.set_xlabel("Predicted Labels")
+        ax.set_ylabel("True Labels")
+
+        plt.tight_layout()
+        return fig
+
+    def update_evaluation_matrix(self, pred_train, pred_test):
+        return
+
+    # making a separate function for plotting error terms
+    def plot_error_terms(self, pred_train):
+        fig, axes = plt.subplots(figsize=(15, 6))
+
+        # Plotting error distribution
+        sns.histplot(self.train_Y - pred_train, bins=30, kde=True, ax=axes)
+        axes.set_title("Error Terms Distribution")
+        axes.set_xlabel("Errors")
+
+        plt.tight_layout()
+        return fig  # returning figure the is created here
+
+    def plot_performance_graph(self):
+        # Predict on test data
+        pred_test = self.model.predict(self.test_X)
+
+        # Calculate confusion matrix
+        cm = confusion_matrix(self.test_Y, pred_test)
+
+        # Plot confusion matrix
+        fig, ax = plt.subplots(figsize=(10, 6))
+        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
+
+        ax.set_title("Confusion Matrix")
+        ax.set_xlabel("Predicted Labels")
+        ax.set_ylabel("True Labels")
+
+        plt.tight_layout()
+        return fig