Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new model named credit card estimator used to detect whether the … #113

Merged
merged 6 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions App.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
**Predictive Calc** offers a powerful suite of machine learning models designed to assist you in making informed decisions. Whether it's predicting house prices, determining loan eligibility, or evaluating health risks, we have you covered.
""")

# Why Choose Calc?
st.markdown("""
## Why Choose Predictive Calc? """)
features = [
Expand Down Expand Up @@ -216,10 +215,7 @@
st.write(calc["details"])
st.markdown("---")

# Add a "Get Started" section at the bottom
st.markdown("## Get Started Today!")
st.markdown("Explore our calculators and take control of your predictive analytics journey!")


st.write("Developed with ❤️ by Yashasvini Sharma | [Github](https://www.github.com/yashasvini121) | [LinkedIn](https://www.linkedin.com/in/yashasvini121/)")

79 changes: 79 additions & 0 deletions form_configs/credit_card_fraud.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"Credit Card Fraud Estimator": {
"Average Amount per Transaction per Day": {
"type": "number",
"min_value": 0,
"max_value": 100000,
"default_value": 100,
"step": 100,
"field_name": "avg_amount_per_day"
},
"Transaction Amount": {
"type": "number",
"min_value": 0,
"max_value": 100000,
"default_value": 3000,
"step": 100,
"field_name": "transaction_amount"
},
"Is Declined": {
"type": "dropdown",
"options": [
"Yes",
"No"
],
"default_value": "No",
"field_name": "Is_declined"
},
"Total Number of Declines per Day": {
"type": "number",
"min_value": 0,
"max_value": 100,
"default_value": 0,
"step": 1,
"field_name": "no_of_declines_per_day"
},
"Is Foreign Transaction": {
"type": "dropdown",
"options": [
"Yes",
"No"
],
"default_value": "No",
"field_name": "Is_Foreign_transaction"
},
"Is High-Risk Country": {
"type": "dropdown",
"options": [
"Yes",
"No"
],
"default_value": "No",
"field_name": "Is_High_Risk_country"
},
"Daily Chargeback Average Amount": {
"type": "number",
"min_value": 0,
"max_value": 10000,
"default_value": 0,
"step": 100,
"field_name": "Daily_chargeback_avg_amt"
},
"6-Month Average Chargeback Amount": {
"type": "number",
"min_value": 0,
"max_value": 10000,
"default_value": 0,
"step": 100,
"field_name": "six_month_avg_chbk_amt"
},
"6-Month Chargeback Frequency": {
"type": "number",
"min_value": 0,
"max_value": 100,
"default_value": 0,
"step": 1,
"field_name": "six_month_chbk_freq"
}
}
}
3,076 changes: 3,076 additions & 0 deletions models/credit_card_fraud/data/creditcardcsvpresent.csv

Large diffs are not rendered by default.

119 changes: 119 additions & 0 deletions models/credit_card_fraud/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# importing libraries
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas as pd
import warnings
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import pickle
from models.credit_card_fraud.modelEvaluation import ModelEvaluation
warnings.filterwarnings("ignore")


# reading dataset
data = pd.read_csv("models\credit_card_fraud\data\creditcardcsvpresent.csv")
df = data.copy(deep=True)

# df.info()

# remove transaction_date all values are null
# and also remove merchant id
df = df.drop(columns=['Merchant_id', 'Transaction date'], axis=1)


# encoding for qualitative variables
code = {
"N": 0,
"Y": 1 }

for obj in df.select_dtypes("object"):
df[obj] = df[obj].map(code)

# Target and Feature Identification
target = "isFradulent"
features = [col for col in df.columns if col != target]

X = df[features] # Create a DataFrame for the features
y = df[target] # Create a Series for the target


# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train SVM Classifier
svm_model = SVC(kernel='rbf', class_weight='balanced', random_state=42) # RBF kernel (default) is good for non-linear problems
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

# Function to prepare input data into a DataFrame
def prepare_input_data(
avg_amount_per_day,
transaction_amount,
Is_declined,
no_of_declines_per_day,
Is_Foreign_transaction,
Is_High_Risk_country,
Daily_chargeback_avg_amt,
six_month_avg_chbk_amt,
six_month_chbk_freq,
):
# Create a DataFrame with the input data
input_data = {
"Average Amount/transaction/day": [avg_amount_per_day],
"Transaction_amount": [transaction_amount],
"Is declined": [Is_declined],
"Total Number of declines/day": [no_of_declines_per_day],
"isForeignTransaction": [Is_Foreign_transaction],
"isHighRiskCountry": [Is_High_Risk_country],
"Daily_chargeback_avg_amt": [Daily_chargeback_avg_amt],
"6_month_avg_chbk_amt": [six_month_avg_chbk_amt],
"6-month_chbk_freq": [six_month_chbk_freq],
}

return pd.DataFrame(input_data)

def get_prediction(
avg_amount_per_day,
transaction_amount,
Is_declined,
no_of_declines_per_day,
Is_Foreign_transaction,
Is_High_Risk_country,
Daily_chargeback_avg_amt,
six_month_avg_chbk_amt,
six_month_chbk_freq,
):
# Prepare the input data
input_df = prepare_input_data(
avg_amount_per_day,
transaction_amount,
Is_declined,
no_of_declines_per_day,
Is_Foreign_transaction,
Is_High_Risk_country,
Daily_chargeback_avg_amt,
six_month_avg_chbk_amt,
six_month_chbk_freq,
)
# Predict using Random Forest
predicted_value = svm_model.predict(input_df)

# Return "Fraud" if fraud (1), else "Not a Fraud"
return "Fraud" if predicted_value[0] == 1 else "Not a Fraud"


# Function to save the model
def save_model():
# Save the Random Forest model
model_filename = 'creditCardFraud_svc_model.pkl'
with open(model_filename, 'wb') as file:
pickle.dump(svm_model, file)

# # Function to evaluate accuracy
def get_evaluator():
evaluator = ModelEvaluation(svm_model, X_train, y_train, X_test, y_test)
return evaluator

# save_model()
108 changes: 108 additions & 0 deletions models/credit_card_fraud/modelEvaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix


class ModelEvaluation:
def __init__(self, model, train_X, train_Y, test_X, test_Y):
self.model = model
self.train_X = train_X
self.train_Y = train_Y
self.test_X = test_X
self.test_Y = test_Y
self.evaluation_matrix = pd.DataFrame(
np.zeros([1, 8]),
columns=[
"Train-R2",
"Test-R2",
"Train-RSS",
"Test-RSS",
"Train-MSE",
"Test-MSE",
"Train-RMSE",
"Test-RMSE",
],
)
self.random_column = np.random.choice(
train_X.columns[train_X.nunique() >= 50], 1, replace=False
)[0]

def evaluate(self):
pred_train = self.model.predict(self.train_X)
pred_test = self.model.predict(self.test_X)

self.update_evaluation_matrix(pred_train, pred_test)
metrics = self.get_metrics()
prediction_plot = self.plot_predictions(pred_train)
error_plot = self.plot_error_terms(pred_train)

# adding performance graph of the model
performance_plot = self.plot_performance_graph()

return metrics, prediction_plot, error_plot, performance_plot

def get_metrics(self):
"""Return a dictionary of evaluation metrics for easy integration."""
pred_train = self.model.predict(self.train_X)
pred_test = self.model.predict(self.test_X)

metrics = {
"Train_R2": accuracy_score(self.train_Y, pred_train),
"Test_R2": accuracy_score(self.test_Y, pred_test),
"Train_RSS": np.sum(np.square(self.train_Y - pred_train)),
"Test_RSS": np.sum(np.square(self.test_Y - pred_test))
}
return metrics

def plot_predictions(self, pred_train):
# Predict on test data
pred_test = self.model.predict(self.test_X)

# Calculate confusion matrix
cm = confusion_matrix(self.test_Y, pred_test)

# Plot confusion matrix
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)

ax.set_title("Confusion Matrix")
ax.set_xlabel("Predicted Labels")
ax.set_ylabel("True Labels")

plt.tight_layout()
return fig

def update_evaluation_matrix(self, pred_train, pred_test):
return

# making a separate function for plotting error terms
def plot_error_terms(self, pred_train):
fig, axes = plt.subplots(figsize=(15, 6))

# Plotting error distribution
sns.histplot(self.train_Y - pred_train, bins=30, kde=True, ax=axes)
axes.set_title("Error Terms Distribution")
axes.set_xlabel("Errors")

plt.tight_layout()
return fig # returning figure the is created here

def plot_performance_graph(self):
# Predict on test data
pred_test = self.model.predict(self.test_X)

# Calculate confusion matrix
cm = confusion_matrix(self.test_Y, pred_test)

# Plot confusion matrix
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)

ax.set_title("Confusion Matrix")
ax.set_xlabel("Predicted Labels")
ax.set_ylabel("True Labels")

plt.tight_layout()
return fig
Loading
Loading