diff --git a/online_payment_fraud_detection/app.py b/online_payment_fraud_detection/app.py new file mode 100644 index 0000000..c6b13e4 --- /dev/null +++ b/online_payment_fraud_detection/app.py @@ -0,0 +1,45 @@ +import streamlit as st +import numpy as np +import pickle + +# Load the trained model +with open("fraud_detection_model.pkl", "rb") as file: + model = pickle.load(file) + +# Streamlit app title and description +st.title("Online Payment Fraud Detection System") +st.markdown(""" +This application predicts whether an online payment transaction is fraudulent based on transaction details. +Enter the transaction information below and click **Predict** to check for fraud. +""") + +# Input section for transaction details +st.subheader("Enter Transaction Details") +transaction_type = st.selectbox("Transaction Type", ["CASH_OUT", "PAYMENT", "CASH_IN", "TRANSFER", "DEBIT"], help="Select the type of transaction.") +amount = st.number_input("Transaction Amount", min_value=0.0, format="%.2f", help="Enter the transaction amount.") +oldbalanceOrg = st.number_input("Original Balance (Before Transaction)", min_value=0.0, format="%.2f", help="Enter the account balance before the transaction.") +newbalanceOrig = st.number_input("New Balance (After Transaction)", min_value=0.0, format="%.2f", help="Enter the account balance after the transaction.") + +# Map transaction types to numeric values +transaction_map = {"CASH_OUT": 1, "PAYMENT": 2, "CASH_IN": 3, "TRANSFER": 4, "DEBIT": 5} +transaction_type_num = transaction_map[transaction_type] + +# Predict fraud when button is clicked +if st.button("Predict"): + # Prepare the input features for prediction + input_features = np.array([[transaction_type_num, amount, oldbalanceOrg, newbalanceOrig]]) + + # Perform prediction + prediction = model.predict(input_features) + + # Display the result + if prediction[0] == "Fraud": + st.error("⚠️ This transaction is predicted as **Fraudulent**!") + else: + st.success("✅ This transaction is predicted as **Not Fraudulent**.") + +# Footer +st.markdown(""" +--- +**Note:** This prediction is based on the trained model and may not be 100% accurate. Use this information as a guide, not a decision-making tool. +""") diff --git a/online_payment_fraud_detection/demo.png.png b/online_payment_fraud_detection/demo.png.png new file mode 100644 index 0000000..aeade65 Binary files /dev/null and b/online_payment_fraud_detection/demo.png.png differ diff --git a/online_payment_fraud_detection/fraud_detection_model.pkl b/online_payment_fraud_detection/fraud_detection_model.pkl new file mode 100644 index 0000000..b089c61 Binary files /dev/null and b/online_payment_fraud_detection/fraud_detection_model.pkl differ diff --git a/online_payment_fraud_detection/hello.ipynb b/online_payment_fraud_detection/hello.ipynb new file mode 100644 index 0000000..b682420 --- /dev/null +++ b/online_payment_fraud_detection/hello.ipynb @@ -0,0 +1,1467 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "ecbcb425-4ef2-4dc7-872c-e6d19510463f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steptypeamountnameOrigoldbalanceOrgnewbalanceOrignameDestoldbalanceDestnewbalanceDestisFraudisFlaggedFraud
01PAYMENT9839.64C1231006815170136.0160296.36M19797871550.00.000
11PAYMENT1864.28C166654429521249.019384.72M20442822250.00.000
21TRANSFER181.00C1305486145181.00.00C5532640650.00.010
31CASH_OUT181.00C840083671181.00.00C3899701021182.00.010
41PAYMENT11668.14C204853772041554.029885.86M12307017030.00.000
\n", + "
" + ], + "text/plain": [ + " step type amount nameOrig oldbalanceOrg newbalanceOrig \\\n", + "0 1 PAYMENT 9839.64 C1231006815 170136.0 160296.36 \n", + "1 1 PAYMENT 1864.28 C1666544295 21249.0 19384.72 \n", + "2 1 TRANSFER 181.00 C1305486145 181.0 0.00 \n", + "3 1 CASH_OUT 181.00 C840083671 181.0 0.00 \n", + "4 1 PAYMENT 11668.14 C2048537720 41554.0 29885.86 \n", + "\n", + " nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud \n", + "0 M1979787155 0.0 0.0 0 0 \n", + "1 M2044282225 0.0 0.0 0 0 \n", + "2 C553264065 0.0 0.0 1 0 \n", + "3 C38997010 21182.0 0.0 1 0 \n", + "4 M1230701703 0.0 0.0 0 0 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#%%\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "data = pd.read_csv(\"onlinefraud.csv\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e09f1bfb-d882-4fa9-a2d4-7bafb79a1f66", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "step 0\n", + "type 0\n", + "amount 0\n", + "nameOrig 0\n", + "oldbalanceOrg 0\n", + "newbalanceOrig 0\n", + "nameDest 0\n", + "oldbalanceDest 0\n", + "newbalanceDest 0\n", + "isFraud 0\n", + "isFlaggedFraud 0\n", + "dtype: int64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# %%\n", + "\n", + "data.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18a97419-8866-4c6c-9bea-c0fed660b76e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "type\n", + "CASH_OUT 2237500\n", + "PAYMENT 2151495\n", + "CASH_IN 1399284\n", + "TRANSFER 532909\n", + "DEBIT 41432\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# %%\n", + "\n", + "# Exploring transaction type\n", + "data.type.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a87881f2-11b7-46af-931d-909a3a6d19a1", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "domain": { + "x": [ + 0, + 1 + ], + "y": [ + 0, + 1 + ] + }, + "hole": 0.5, + "hovertemplate": "label=%{label}
value=%{value}", + "labels": [ + "CASH_OUT", + "PAYMENT", + "CASH_IN", + "TRANSFER", + "DEBIT" + ], + "legendgroup": "", + "name": "", + "showlegend": true, + "type": "pie", + "values": [ + 2237500, + 2151495, + 1399284, + 532909, + 41432 + ] + } + ], + "layout": { + "legend": { + "tracegroupgap": 0 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Distribution of Transaction Type" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# %%\n", + "\n", + "type = data[\"type\"].value_counts()\n", + "transactions = type.index\n", + "quantity = type.values\n", + "\n", + "import plotly.express as px\n", + "figure = px.pie(data, \n", + " values=quantity, \n", + " names=transactions,hole = 0.5, \n", + " title=\"Distribution of Transaction Type\")\n", + "figure.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a2f8c81-f73a-449f-ae0a-ee60adf436a9", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "could not convert string to float: 'PAYMENT'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\91929\\Downloads\\online_payment_fraud_detection\\hello.py:2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# %%\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m correlation \u001b[39m=\u001b[39m data\u001b[39m.\u001b[39;49mcorr()\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(correlation[\u001b[39m\"\u001b[39m\u001b[39misFraud\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39msort_values(ascending\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m))\n", + "File \u001b[1;32mc:\\Users\\91929\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\frame.py:11049\u001b[0m, in \u001b[0;36mDataFrame.corr\u001b[1;34m(self, method, min_periods, numeric_only)\u001b[0m\n\u001b[0;32m 11047\u001b[0m cols \u001b[39m=\u001b[39m data\u001b[39m.\u001b[39mcolumns\n\u001b[0;32m 11048\u001b[0m idx \u001b[39m=\u001b[39m cols\u001b[39m.\u001b[39mcopy()\n\u001b[1;32m> 11049\u001b[0m mat \u001b[39m=\u001b[39m data\u001b[39m.\u001b[39;49mto_numpy(dtype\u001b[39m=\u001b[39;49m\u001b[39mfloat\u001b[39;49m, na_value\u001b[39m=\u001b[39;49mnp\u001b[39m.\u001b[39;49mnan, copy\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n\u001b[0;32m 11051\u001b[0m \u001b[39mif\u001b[39;00m method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mpearson\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m 11052\u001b[0m correl \u001b[39m=\u001b[39m libalgos\u001b[39m.\u001b[39mnancorr(mat, minp\u001b[39m=\u001b[39mmin_periods)\n", + "File \u001b[1;32mc:\\Users\\91929\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\frame.py:1993\u001b[0m, in \u001b[0;36mDataFrame.to_numpy\u001b[1;34m(self, dtype, copy, na_value)\u001b[0m\n\u001b[0;32m 1991\u001b[0m \u001b[39mif\u001b[39;00m dtype \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 1992\u001b[0m dtype \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mdtype(dtype)\n\u001b[1;32m-> 1993\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_mgr\u001b[39m.\u001b[39;49mas_array(dtype\u001b[39m=\u001b[39;49mdtype, copy\u001b[39m=\u001b[39;49mcopy, na_value\u001b[39m=\u001b[39;49mna_value)\n\u001b[0;32m 1994\u001b[0m \u001b[39mif\u001b[39;00m result\u001b[39m.\u001b[39mdtype \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m dtype:\n\u001b[0;32m 1995\u001b[0m result \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39masarray(result, dtype\u001b[39m=\u001b[39mdtype)\n", + "File \u001b[1;32mc:\\Users\\91929\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:1694\u001b[0m, in \u001b[0;36mBlockManager.as_array\u001b[1;34m(self, dtype, copy, na_value)\u001b[0m\n\u001b[0;32m 1692\u001b[0m arr\u001b[39m.\u001b[39mflags\u001b[39m.\u001b[39mwriteable \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 1693\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1694\u001b[0m arr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_interleave(dtype\u001b[39m=\u001b[39;49mdtype, na_value\u001b[39m=\u001b[39;49mna_value)\n\u001b[0;32m 1695\u001b[0m \u001b[39m# The underlying data was copied within _interleave, so no need\u001b[39;00m\n\u001b[0;32m 1696\u001b[0m \u001b[39m# to further copy if copy=True or setting na_value\u001b[39;00m\n\u001b[0;32m 1698\u001b[0m \u001b[39mif\u001b[39;00m na_value \u001b[39mis\u001b[39;00m lib\u001b[39m.\u001b[39mno_default:\n", + "File \u001b[1;32mc:\\Users\\91929\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:1753\u001b[0m, in \u001b[0;36mBlockManager._interleave\u001b[1;34m(self, dtype, na_value)\u001b[0m\n\u001b[0;32m 1751\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 1752\u001b[0m arr \u001b[39m=\u001b[39m blk\u001b[39m.\u001b[39mget_values(dtype)\n\u001b[1;32m-> 1753\u001b[0m result[rl\u001b[39m.\u001b[39;49mindexer] \u001b[39m=\u001b[39m arr\n\u001b[0;32m 1754\u001b[0m itemmask[rl\u001b[39m.\u001b[39mindexer] \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m 1756\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m itemmask\u001b[39m.\u001b[39mall():\n", + "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'PAYMENT'" + ] + } + ], + "source": [ + "# %%\n", + "\n", + "correlation = data.corr()\n", + "print(correlation[\"isFraud\"].sort_values(ascending=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07637929-1d1c-422b-bc9f-057b3092fbdb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "step int64\n", + "type object\n", + "amount float64\n", + "nameOrig object\n", + "oldbalanceOrg float64\n", + "newbalanceOrig float64\n", + "nameDest object\n", + "oldbalanceDest float64\n", + "newbalanceDest float64\n", + "isFraud int64\n", + "isFlaggedFraud int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# %%\n", + "\n", + "print(data.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20fe83f7-62cf-4f03-b7e8-3309cf056b57", + "metadata": {}, + "outputs": [], + "source": [ + "# %%\n", + "\n", + "data_numeric = data.select_dtypes(include=[float, int])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91e9dae5-9e8c-4c43-8e4c-321eb1da4082", + "metadata": {}, + "outputs": [], + "source": [ + "# %%\n", + "\n", + "data_numeric = data_numeric.dropna() # Drop rows with missing values\n", + "# or\n", + "data_numeric = data_numeric.fillna(0) # Replace missing values with 0 or another value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f6e22ec-1441-41d4-b87b-881dbac3b0db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "isFraud 1.000000\n", + "amount 0.076688\n", + "isFlaggedFraud 0.044109\n", + "step 0.031578\n", + "oldbalanceOrg 0.010154\n", + "newbalanceDest 0.000535\n", + "oldbalanceDest -0.005885\n", + "newbalanceOrig -0.008148\n", + "Name: isFraud, dtype: float64\n" + ] + } + ], + "source": [ + "# %%\n", + "\n", + "correlation = data_numeric.corr()\n", + "print(correlation[\"isFraud\"].sort_values(ascending=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "327e419e-5c5b-434a-9526-ba6e9b8ca910", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steptypeamountnameOrigoldbalanceOrgnewbalanceOrignameDestoldbalanceDestnewbalanceDestisFraudisFlaggedFraud
0129839.64C1231006815170136.0160296.36M19797871550.00.0No Fraud0
1121864.28C166654429521249.019384.72M20442822250.00.0No Fraud0
214181.00C1305486145181.00.00C5532640650.00.0Fraud0
311181.00C840083671181.00.00C3899701021182.00.0Fraud0
41211668.14C204853772041554.029885.86M12307017030.00.0No Fraud0
\n", + "
" + ], + "text/plain": [ + " step type amount nameOrig oldbalanceOrg newbalanceOrig \\\n", + "0 1 2 9839.64 C1231006815 170136.0 160296.36 \n", + "1 1 2 1864.28 C1666544295 21249.0 19384.72 \n", + "2 1 4 181.00 C1305486145 181.0 0.00 \n", + "3 1 1 181.00 C840083671 181.0 0.00 \n", + "4 1 2 11668.14 C2048537720 41554.0 29885.86 \n", + "\n", + " nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud \n", + "0 M1979787155 0.0 0.0 No Fraud 0 \n", + "1 M2044282225 0.0 0.0 No Fraud 0 \n", + "2 C553264065 0.0 0.0 Fraud 0 \n", + "3 C38997010 21182.0 0.0 Fraud 0 \n", + "4 M1230701703 0.0 0.0 No Fraud 0 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# %%\n", + "\n", + "data[\"type\"] = data[\"type\"].map({\"CASH_OUT\": 1, \"PAYMENT\": 2, \n", + " \"CASH_IN\": 3, \"TRANSFER\": 4,\n", + " \"DEBIT\": 5})\n", + "data[\"isFraud\"] = data[\"isFraud\"].map({0: \"No Fraud\", 1: \"Fraud\"})\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "905a0b7f-3f12-4260-b4fd-fae6a0ea9a83", + "metadata": {}, + "outputs": [], + "source": [ + "# %%\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "x = np.array(data[[\"type\", \"amount\", \"oldbalanceOrg\", \"newbalanceOrig\"]])\n", + "y = np.array(data[[\"isFraud\"]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2d0461c-13fb-4904-aa9f-dbed0d38f0c6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9996982375185065" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# %%\n", + "\n", + "# training a machine learning model\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.20, random_state=42)\n", + "model = DecisionTreeClassifier()\n", + "model.fit(xtrain, ytrain)\n", + "model.score(xtest, ytest)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fad573f5-eb07-45b6-a4c1-a721bb747c15", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Fraud']\n" + ] + } + ], + "source": [ + "# %%\n", + "\n", + "# prediction\n", + "#features = [type, amount, oldbalanceOrg, newbalanceOrig]\n", + "features = np.array([[4, 9000.60, 9000.60, 0.0]])\n", + "print(model.predict(features))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f6b3090-b656-4afc-b114-3bf1d63eac0d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['No Fraud']\n" + ] + } + ], + "source": [ + "# %%\n", + "\n", + "features = np.array([[4, 9000.60, 9000.60, 50000.0]])\n", + "print(model.predict(features))" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/online_payment_fraud_detection/hello.py b/online_payment_fraud_detection/hello.py new file mode 100644 index 0000000..46fe615 --- /dev/null +++ b/online_payment_fraud_detection/hello.py @@ -0,0 +1,84 @@ +#%% +import pandas as pd +import numpy as np +data = pd.read_csv("onlinefraud.csv") +data.head() +# %% +data.isnull().sum() +# %% +# Exploring transaction type +data.type.value_counts() +# %% +type = data["type"].value_counts() +transactions = type.index +quantity = type.values + +import plotly.express as px +figure = px.pie(data, + values=quantity, + names=transactions,hole = 0.5, + title="Distribution of Transaction Type") +figure.show() +# %% +# %% +print(data.dtypes) + +# %% +data_numeric = data.select_dtypes(include=[float, int]) + +# %% +data_numeric = data_numeric.dropna() # Drop rows with missing values +# or +data_numeric = data_numeric.fillna(0) # Replace missing values with 0 or another value + +# %% +correlation = data_numeric.corr() +print(correlation["isFraud"].sort_values(ascending=False)) + +# %% +data["type"] = data["type"].map({"CASH_OUT": 1, "PAYMENT": 2, + "CASH_IN": 3, "TRANSFER": 4, + "DEBIT": 5}) +data["isFraud"] = data["isFraud"].map({0: "No Fraud", 1: "Fraud"}) +data.head() +# %% +from sklearn.model_selection import train_test_split +x = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]]) +y = np.array(data[["isFraud"]]) +# %% +# training a machine learning model +from sklearn.tree import DecisionTreeClassifier +xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.20, random_state=42) +model = DecisionTreeClassifier() +model.fit(xtrain, ytrain) +model.score(xtest, ytest) +# %% +# prediction +#features = [type, amount, oldbalanceOrg, newbalanceOrig] +features = np.array([[4, 9000.60, 9000.60, 0.0]]) +print(model.predict(features)) +# %% +features = np.array([[4, 9000.60, 9000.60, 50000.0]]) +print(model.predict(features)) + + +# %% +# After training your model +from sklearn.tree import DecisionTreeClassifier +from sklearn.model_selection import train_test_split +import numpy as np + +# Your existing model training code +xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.20, random_state=42) +model = DecisionTreeClassifier() +model.fit(xtrain, ytrain) + +# Save the trained model using pickle +import pickle + +with open("fraud_detection_model.pkl", "wb") as file: + pickle.dump(model, file) + +print("Model saved as fraud_detection_model.pkl") + +# %%