Skip to content

Commit

Permalink
add credit card estimator
Browse files Browse the repository at this point in the history
  • Loading branch information
yashasvini121 authored Oct 27, 2024
2 parents f6e7575 + 534c61f commit 056ec77
Show file tree
Hide file tree
Showing 9 changed files with 3,488 additions and 4 deletions.
4 changes: 0 additions & 4 deletions App.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
**Predictive Calc** offers a powerful suite of machine learning models designed to assist you in making informed decisions. Whether it's predicting house prices, determining loan eligibility, or evaluating health risks, we have you covered.
""")

# Why Choose Calc?
st.markdown("""
## Why Choose Predictive Calc? """)
features = [
Expand Down Expand Up @@ -216,10 +215,7 @@
st.write(calc["details"])
st.markdown("---")

# Add a "Get Started" section at the bottom
st.markdown("## Get Started Today!")
st.markdown("Explore our calculators and take control of your predictive analytics journey!")


st.write("Developed with ❤️ by Yashasvini Sharma | [Github](https://www.github.com/yashasvini121) | [LinkedIn](https://www.linkedin.com/in/yashasvini121/)")

79 changes: 79 additions & 0 deletions form_configs/credit_card_fraud.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"Credit Card Fraud Estimator": {
"Average Amount per Transaction per Day": {
"type": "number",
"min_value": 0,
"max_value": 100000,
"default_value": 100,
"step": 100,
"field_name": "avg_amount_per_day"
},
"Transaction Amount": {
"type": "number",
"min_value": 0,
"max_value": 100000,
"default_value": 3000,
"step": 100,
"field_name": "transaction_amount"
},
"Is Declined": {
"type": "dropdown",
"options": [
"Yes",
"No"
],
"default_value": "No",
"field_name": "Is_declined"
},
"Total Number of Declines per Day": {
"type": "number",
"min_value": 0,
"max_value": 100,
"default_value": 0,
"step": 1,
"field_name": "no_of_declines_per_day"
},
"Is Foreign Transaction": {
"type": "dropdown",
"options": [
"Yes",
"No"
],
"default_value": "No",
"field_name": "Is_Foreign_transaction"
},
"Is High-Risk Country": {
"type": "dropdown",
"options": [
"Yes",
"No"
],
"default_value": "No",
"field_name": "Is_High_Risk_country"
},
"Daily Chargeback Average Amount": {
"type": "number",
"min_value": 0,
"max_value": 10000,
"default_value": 0,
"step": 100,
"field_name": "Daily_chargeback_avg_amt"
},
"6-Month Average Chargeback Amount": {
"type": "number",
"min_value": 0,
"max_value": 10000,
"default_value": 0,
"step": 100,
"field_name": "six_month_avg_chbk_amt"
},
"6-Month Chargeback Frequency": {
"type": "number",
"min_value": 0,
"max_value": 100,
"default_value": 0,
"step": 1,
"field_name": "six_month_chbk_freq"
}
}
}
3,076 changes: 3,076 additions & 0 deletions models/credit_card_fraud/data/creditcardcsvpresent.csv

Large diffs are not rendered by default.

119 changes: 119 additions & 0 deletions models/credit_card_fraud/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# importing libraries
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import pandas as pd
import warnings
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import pickle
from models.credit_card_fraud.modelEvaluation import ModelEvaluation
warnings.filterwarnings("ignore")


# reading dataset
data = pd.read_csv("models\credit_card_fraud\data\creditcardcsvpresent.csv")
df = data.copy(deep=True)

# df.info()

# remove transaction_date all values are null
# and also remove merchant id
df = df.drop(columns=['Merchant_id', 'Transaction date'], axis=1)


# encoding for qualitative variables
code = {
"N": 0,
"Y": 1 }

for obj in df.select_dtypes("object"):
df[obj] = df[obj].map(code)

# Target and Feature Identification
target = "isFradulent"
features = [col for col in df.columns if col != target]

X = df[features] # Create a DataFrame for the features
y = df[target] # Create a Series for the target


# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train SVM Classifier
svm_model = SVC(kernel='rbf', class_weight='balanced', random_state=42) # RBF kernel (default) is good for non-linear problems
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

# Function to prepare input data into a DataFrame
def prepare_input_data(
avg_amount_per_day,
transaction_amount,
Is_declined,
no_of_declines_per_day,
Is_Foreign_transaction,
Is_High_Risk_country,
Daily_chargeback_avg_amt,
six_month_avg_chbk_amt,
six_month_chbk_freq,
):
# Create a DataFrame with the input data
input_data = {
"Average Amount/transaction/day": [avg_amount_per_day],
"Transaction_amount": [transaction_amount],
"Is declined": [Is_declined],
"Total Number of declines/day": [no_of_declines_per_day],
"isForeignTransaction": [Is_Foreign_transaction],
"isHighRiskCountry": [Is_High_Risk_country],
"Daily_chargeback_avg_amt": [Daily_chargeback_avg_amt],
"6_month_avg_chbk_amt": [six_month_avg_chbk_amt],
"6-month_chbk_freq": [six_month_chbk_freq],
}

return pd.DataFrame(input_data)

def get_prediction(
avg_amount_per_day,
transaction_amount,
Is_declined,
no_of_declines_per_day,
Is_Foreign_transaction,
Is_High_Risk_country,
Daily_chargeback_avg_amt,
six_month_avg_chbk_amt,
six_month_chbk_freq,
):
# Prepare the input data
input_df = prepare_input_data(
avg_amount_per_day,
transaction_amount,
Is_declined,
no_of_declines_per_day,
Is_Foreign_transaction,
Is_High_Risk_country,
Daily_chargeback_avg_amt,
six_month_avg_chbk_amt,
six_month_chbk_freq,
)
# Predict using Random Forest
predicted_value = svm_model.predict(input_df)

# Return "Fraud" if fraud (1), else "Not a Fraud"
return "Fraud" if predicted_value[0] == 1 else "Not a Fraud"


# Function to save the model
def save_model():
# Save the Random Forest model
model_filename = 'creditCardFraud_svc_model.pkl'
with open(model_filename, 'wb') as file:
pickle.dump(svm_model, file)

# # Function to evaluate accuracy
def get_evaluator():
evaluator = ModelEvaluation(svm_model, X_train, y_train, X_test, y_test)
return evaluator

# save_model()
108 changes: 108 additions & 0 deletions models/credit_card_fraud/modelEvaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix


class ModelEvaluation:
def __init__(self, model, train_X, train_Y, test_X, test_Y):
self.model = model
self.train_X = train_X
self.train_Y = train_Y
self.test_X = test_X
self.test_Y = test_Y
self.evaluation_matrix = pd.DataFrame(
np.zeros([1, 8]),
columns=[
"Train-R2",
"Test-R2",
"Train-RSS",
"Test-RSS",
"Train-MSE",
"Test-MSE",
"Train-RMSE",
"Test-RMSE",
],
)
self.random_column = np.random.choice(
train_X.columns[train_X.nunique() >= 50], 1, replace=False
)[0]

def evaluate(self):
pred_train = self.model.predict(self.train_X)
pred_test = self.model.predict(self.test_X)

self.update_evaluation_matrix(pred_train, pred_test)
metrics = self.get_metrics()
prediction_plot = self.plot_predictions(pred_train)
error_plot = self.plot_error_terms(pred_train)

# adding performance graph of the model
performance_plot = self.plot_performance_graph()

return metrics, prediction_plot, error_plot, performance_plot

def get_metrics(self):
"""Return a dictionary of evaluation metrics for easy integration."""
pred_train = self.model.predict(self.train_X)
pred_test = self.model.predict(self.test_X)

metrics = {
"Train_R2": accuracy_score(self.train_Y, pred_train),
"Test_R2": accuracy_score(self.test_Y, pred_test),
"Train_RSS": np.sum(np.square(self.train_Y - pred_train)),
"Test_RSS": np.sum(np.square(self.test_Y - pred_test))
}
return metrics

def plot_predictions(self, pred_train):
# Predict on test data
pred_test = self.model.predict(self.test_X)

# Calculate confusion matrix
cm = confusion_matrix(self.test_Y, pred_test)

# Plot confusion matrix
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)

ax.set_title("Confusion Matrix")
ax.set_xlabel("Predicted Labels")
ax.set_ylabel("True Labels")

plt.tight_layout()
return fig

def update_evaluation_matrix(self, pred_train, pred_test):
return

# making a separate function for plotting error terms
def plot_error_terms(self, pred_train):
fig, axes = plt.subplots(figsize=(15, 6))

# Plotting error distribution
sns.histplot(self.train_Y - pred_train, bins=30, kde=True, ax=axes)
axes.set_title("Error Terms Distribution")
axes.set_xlabel("Errors")

plt.tight_layout()
return fig # returning figure the is created here

def plot_performance_graph(self):
# Predict on test data
pred_test = self.model.predict(self.test_X)

# Calculate confusion matrix
cm = confusion_matrix(self.test_Y, pred_test)

# Plot confusion matrix
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)

ax.set_title("Confusion Matrix")
ax.set_xlabel("Predicted Labels")
ax.set_ylabel("True Labels")

plt.tight_layout()
return fig
Loading

0 comments on commit 056ec77

Please sign in to comment.