diff --git a/AMLsim/Dockerfile b/AMLsim/Dockerfile index 5fa4a809..77f05cad 100644 --- a/AMLsim/Dockerfile +++ b/AMLsim/Dockerfile @@ -37,4 +37,4 @@ COPY index.html index.html COPY pom.xml pom.xml COPY target target -ENTRYPOINT ["sh", "scripts/run.sh"] \ No newline at end of file +ENTRYPOINT ["sh", "scripts/run.sh"] diff --git a/AMLsim/paramFiles/10K_accts.zip b/AMLsim/paramFiles/10K_accts.zip new file mode 100644 index 00000000..9b5a9d4a Binary files /dev/null and b/AMLsim/paramFiles/10K_accts.zip differ diff --git a/AMLsim/paramFiles/10K_accts/accounts.csv b/AMLsim/paramFiles/10K_accts/accounts.csv index 13f17163..30f2ae9b 100644 --- a/AMLsim/paramFiles/10K_accts/accounts.csv +++ b/AMLsim/paramFiles/10K_accts/accounts.csv @@ -1,13 +1,13 @@ count,min_balance,max_balance,country,business_type,bank_id -2768,10000,50000,SWE,I,swedbank -1347,10000,50000,SWE,I,handelsbanken -371,10000,50000,SWE,I,seb -2694,10000,50000,SWE,I,nordea -104,10000,50000,SWE,I,danske -458,10000,50000,SWE,I,länsförsäkringar -748,10000,50000,SWE,I,ica -362,10000,50000,SWE,I,sparbanken -23,10000,50000,SWE,I,ålandsbanken -224,10000,50000,SWE,I,marginalen -625,10000,50000,SWE,I,svea -276,10000,50000,SWE,I,skandia +2768,10000,20000,SWE,I,swedbank +1347,5000,10000,SWE,I,handelsbanken +371,5000,10000,SWE,I,seb +2694,5000,10000,SWE,I,nordea +104,5000,10000,SWE,I,danske +458,5000,10000,SWE,I,länsförsäkringar +748,5000,10000,SWE,I,ica +362,5000,10000,SWE,I,sparbanken +23,5000,10000,SWE,I,ålandsbanken +224,5000,10000,SWE,I,marginalen +625,5000,10000,SWE,I,svea +276,5000,10000,SWE,I,skandia \ No newline at end of file diff --git a/AMLsim/paramFiles/10K_accts/alertPatterns.csv b/AMLsim/paramFiles/10K_accts/alertPatterns.csv index 6ef8ebfb..bf6832fa 100755 --- a/AMLsim/paramFiles/10K_accts/alertPatterns.csv +++ b/AMLsim/paramFiles/10K_accts/alertPatterns.csv @@ -1,20 +1,20 @@ count,type,schedule_id,min_accounts,max_accounts,min_amount,max_amount,min_period,max_period,bank_id,is_sar,source_type -1,fan_out,2,7,7,100,1000,1,365,,True,CASH -1,fan_in,2,7,7,100,1000,1,365,,True,CASH -1,cycle,2,7,7,100,1000,1,365,,True,CASH -1,bipartite,2,7,7,100,1000,1,365,,True,CASH -1,stack,2,7,7,100,1000,1,365,,True,CASH -1,scatter_gather,2,7,7,100,1000,1,365,,True,CASH -1,gather_scatter,2,7,7,100,1000,1,365,,True,CASH -1,fan_in,2,4,4,100,1000,1,365,swedbank,True,CASH -1,fan_in,2,4,4,100,1000,1,365,handelsbanken,True,CASH -1,fan_in,2,4,4,100,1000,1,365,seb,True,CASH -1,fan_in,2,4,4,100,1000,1,365,nordea,True,CASH -1,fan_in,2,4,4,100,1000,1,365,danske,True,CASH -1,fan_in,2,4,4,100,1000,1,365,länsförsäkringar,True,CASH -1,fan_in,2,4,4,100,1000,1,365,ica,True,CASH -1,fan_in,2,4,4,100,1000,1,365,sparbanken,True,CASH -1,fan_in,2,4,4,100,1000,1,365,ålandsbanken,True,CASH -1,fan_in,2,4,4,100,1000,1,365,marginalen,True,CASH -1,fan_in,2,4,4,100,1000,1,365,svea,True,CASH -1,fan_in,2,4,4,100,1000,1,365,skandia,True,CASH +1,fan_out,2,7,7,100,1000,1,168,,True,CASH +1,fan_in,2,7,7,100,1000,1,168,,True,CASH +1,cycle,2,7,7,100,1000,1,168,,True,CASH +1,bipartite,2,7,7,100,1000,1,168,,True,CASH +1,stack,2,7,7,100,1000,1,168,,True,CASH +1,scatter_gather,2,7,7,100,1000,1,168,,True,CASH +1,gather_scatter,2,7,7,100,1000,1,168,,True,CASH +1,fan_in,2,4,4,100,1000,1,168,swedbank,True,CASH +1,fan_in,2,4,4,100,1000,1,168,handelsbanken,True,CASH +1,fan_in,2,4,4,100,1000,1,168,seb,True,CASH +1,fan_in,2,4,4,100,1000,1,168,nordea,True,CASH +1,fan_in,2,4,4,100,1000,1,168,danske,True,CASH +1,fan_in,2,4,4,100,1000,1,168,länsförsäkringar,True,CASH +1,fan_in,2,4,4,100,1000,1,168,ica,True,CASH +1,fan_in,2,4,4,100,1000,1,168,sparbanken,True,CASH +1,fan_in,2,4,4,100,1000,1,168,ålandsbanken,True,CASH +1,fan_in,2,4,4,100,1000,1,168,marginalen,True,CASH +1,fan_in,2,4,4,100,1000,1,168,svea,True,CASH +1,fan_in,2,4,4,100,1000,1,168,skandia,True,CASH diff --git a/AMLsim/paramFiles/10K_accts/conf.json b/AMLsim/paramFiles/10K_accts/conf.json index e6e5d409..e04d019e 100644 --- a/AMLsim/paramFiles/10K_accts/conf.json +++ b/AMLsim/paramFiles/10K_accts/conf.json @@ -2,26 +2,25 @@ "general": { "random_seed": 0, "simulation_name": "10K_accts", - "total_steps": 367, - "base_date": "2023-01-01" + "total_steps": 170 }, "default": { "min_amount": 1, "max_amount": 150000, "mean_amount": 637, - "std_amount": 300, - "mean_amount_sar": 1000, - "std_amount_sar": 300, + "std_amount": 3000, + "mean_amount_sar": 2000, + "std_amount_sar": 3000, "prob_income": 0.0, "mean_income": 0.0, "std_income": 0.0, "prob_income_sar": 0.0, "mean_income_sar": 0.0, "std_income_sar": 0.0, - "mean_outcome": 1000, - "std_outcome": 500, - "mean_outcome_sar": 1000, - "std_outcome_sar": 500, + "mean_outcome": 200.0, + "std_outcome": 500.0, + "mean_outcome_sar": 0.0, + "std_outcome_sar": 0.0, "mean_phone_change_frequency": 1460, "std_phone_change_frequency": 365, "mean_phone_change_frequency_sar": 365, @@ -30,31 +29,7 @@ "std_bank_change_frequency": 1, "mean_bank_change_frequency_sar": 1460, "std_bank_change_frequency_sar": 1, - "min_balance": 100000, - "max_balance": 200000, - "start_step": -1, - "end_step": -1, - "start_range": -1, - "end_range": -1, - "transaction_model": 1, - "margin_ratio": 0.1, - "bank_id": "default", - "cash_in": { - "normal_interval": 100, - "fraud_interval": 50, - "normal_min_amount": 50, - "normal_max_amount": 100, - "fraud_min_amount": 500, - "fraud_max_amount": 1000 - }, - "cash_out": { - "normal_interval": 10, - "fraud_interval": 100, - "normal_min_amount": 10, - "normal_max_amount": 100, - "fraud_min_amount": 1000, - "fraud_max_amount": 2000 - } + "margin_ratio": 0.1 }, "input": { "directory": "paramFiles/10K_accts", @@ -75,39 +50,14 @@ }, "output": { "directory": "outputs", - "accounts": "accounts.csv", - "transactions": "transactions.csv", - "cash_transactions": "cash_tx.csv", - "alert_members": "alert_accounts.csv", - "alert_transactions": "alert_transactions.csv", - "sar_accounts": "sar_accounts.csv", - "party_individuals": "individuals-bulkload.csv", - "party_organizations": "organizations-bulkload.csv", - "account_mapping": "accountMapping.csv", - "resolved_entities": "resolvedentities.csv", - "transaction_log": "tx_log.csv", - "counter_log": "tx_count.csv", - "diameter_log": "diameter.csv" + "transaction_log": "tx_log.csv" }, "graph_generator": { - "degree_threshold": 1, - "high_risk_countries": "", - "high_risk_business": "" + "degree_threshold": 1 }, "simulator": { - "compute_diameter": false, "transaction_limit": 100000, "transaction_interval": 7, - "sar_interval": 7, - "sar_balance_ratio": 1.0, - "numBranches": 1000 - }, - "visualizer": { - "degree": "deg.png", - "wcc": "wcc.png", - "alert": "alert.png", - "count": "count.png", - "clustering": "cc.png", - "diameter": "diameter.png" + "sar_interval": 7 } } \ No newline at end of file diff --git a/AMLsim/paramFiles/10K_accts/conf_old.json b/AMLsim/paramFiles/10K_accts/conf_old.json new file mode 100644 index 00000000..af68193d --- /dev/null +++ b/AMLsim/paramFiles/10K_accts/conf_old.json @@ -0,0 +1,113 @@ +{ + "general": { + "random_seed": 0, + "simulation_name": "10K_accts", + "total_steps": 732, + "base_date": "2023-01-01" + }, + "default": { + "min_amount": 1, + "max_amount": 150000, + "mean_amount": 637, + "std_amount": 1800, + "mean_amount_sar": 2000, + "std_amount_sar": 1800, + "prob_income": 0.0, + "mean_income": 0.0, + "std_income": 0.0, + "prob_income_sar": 0.0, + "mean_income_sar": 0.0, + "std_income_sar": 0.0, + "mean_outcome": 1000, + "std_outcome": 500, + "mean_outcome_sar": 1000, + "std_outcome_sar": 500, + "mean_phone_change_frequency": 1460, + "std_phone_change_frequency": 365, + "mean_phone_change_frequency_sar": 365, + "std_phone_change_frequency_sar": 182, + "mean_bank_change_frequency": 1460, + "std_bank_change_frequency": 1, + "mean_bank_change_frequency_sar": 1460, + "std_bank_change_frequency_sar": 1, + "min_balance": 100000, + "max_balance": 200000, + "start_step": -1, + "end_step": -1, + "start_range": -1, + "end_range": -1, + "transaction_model": 1, + "margin_ratio": 0.1, + "bank_id": "default", + "cash_in": { + "normal_interval": 100, + "fraud_interval": 50, + "normal_min_amount": 50, + "normal_max_amount": 100, + "fraud_min_amount": 500, + "fraud_max_amount": 1000 + }, + "cash_out": { + "normal_interval": 10, + "fraud_interval": 100, + "normal_min_amount": 10, + "normal_max_amount": 100, + "fraud_min_amount": 1000, + "fraud_max_amount": 2000 + } + }, + "input": { + "directory": "paramFiles/10K_accts", + "schema": "schema.json", + "accounts": "accounts.csv", + "alert_patterns": "alertPatterns.csv", + "normal_models": "normalModels.csv", + "degree": "degree.csv", + "transaction_type": "transactionType.csv", + "is_aggregated_accounts": true + }, + "temporal": { + "directory": "tmp", + "transactions": "transactions.csv", + "accounts": "accounts.csv", + "alert_members": "alert_members.csv", + "normal_models": "normal_models.csv" + }, + "output": { + "directory": "outputs", + "accounts": "accounts.csv", + "transactions": "transactions.csv", + "cash_transactions": "cash_tx.csv", + "alert_members": "alert_accounts.csv", + "alert_transactions": "alert_transactions.csv", + "sar_accounts": "sar_accounts.csv", + "party_individuals": "individuals-bulkload.csv", + "party_organizations": "organizations-bulkload.csv", + "account_mapping": "accountMapping.csv", + "resolved_entities": "resolvedentities.csv", + "transaction_log": "tx_log.csv", + "counter_log": "tx_count.csv", + "diameter_log": "diameter.csv" + }, + "graph_generator": { + "degree_threshold": 1, + "high_risk_countries": "", + "high_risk_business": "" + }, + "simulator": { + "compute_diameter": false, + "transaction_limit": 100000, + "transaction_interval": 7, + "sar_interval": 7, + "sar_balance_ratio": 1.0, + "numBranches": 1000 + }, + "visualizer": { + "degree": "deg.png", + "wcc": "wcc.png", + "alert": "alert.png", + "count": "count.png", + "clustering": "cc.png", + "diameter": "diameter.png" + } +} \ No newline at end of file diff --git a/AMLsim/paramFiles/10K_accts/normalModels.csv b/AMLsim/paramFiles/10K_accts/normalModels.csv index 1b25c615..7bd85ad2 100644 --- a/AMLsim/paramFiles/10K_accts/normalModels.csv +++ b/AMLsim/paramFiles/10K_accts/normalModels.csv @@ -1,7 +1,15 @@ count,type,schedule_id,min_accounts,max_accounts,min_period,max_period,bank_id -5000,single,2,1,1,1,365, -5000,fan_out,2,4,4,1,365, -5000,fan_in,2,4,4,1,365, -5000,forward,2,3,3,1,365, -5000,mutual,2,2,2,1,365, -5000,periodical,2,2,2,1,365, \ No newline at end of file +3000,single,2,1,1,1,168, +3000,fan_out,2,4,4,1,168, +3000,fan_in,2,4,4,1,168, +3000,forward,2,3,3,1,168, +3000,mutual,2,2,2,1,168, +3000,periodical,2,2,2,1,168, +1000,single,0,1,1,1,168,handelsbanken +3000,fan_out,1,4,4,1,168,handelsbanken +3000,fan_in,2,4,4,1,168,handelsbanken +3000,forward,3,3,3,1,168,handelsbanken +3000,mutual,2,2,2,1,168,handelsbanken +3000,periodical,2,2,2,1,168,handelsbanken +30,fan_out,3,40,40,1,168, +30,fan_in,3,40,40,1,168, diff --git a/AMLsim/scripts/generate_scalefree.py b/AMLsim/scripts/generate_scalefree.py index 6c3fdc46..811c9344 100644 --- a/AMLsim/scripts/generate_scalefree.py +++ b/AMLsim/scripts/generate_scalefree.py @@ -92,7 +92,7 @@ def powerlaw_cluster_generator(_n, _edge_factor): print("Number of vertices: %d" % g.number_of_nodes()) # Number of vertices (accounts) print("Number of edges: %d" % g.number_of_edges()) # Number of edges (transactions) - #out_deg = Counter(g.out_degree().values()) + #out_deg = Counter(g.out_degree().values()) # TODO: fix so degree.csv has aggretgates #in_deg = Counter(g.in_degree().values()) #keys = set(sorted(list(in_deg.keys()) + list(out_deg.keys()))) diff --git a/AMLsim/src/main/java/amlsim/AMLSim.java b/AMLsim/src/main/java/amlsim/AMLSim.java index b103d6e8..7043377c 100755 --- a/AMLsim/src/main/java/amlsim/AMLSim.java +++ b/AMLsim/src/main/java/amlsim/AMLSim.java @@ -173,6 +173,7 @@ public void loadParametersFromFile() { } // Parameters of Cash Transactions + /* TODO: remove? int norm_in_int = simProp.getCashTxInterval(true, false); // Interval of cash-in transactions for normal account int suspicious_in_int = simProp.getCashTxInterval(true, true); // Interval of cash-in transactions for // suspicious account @@ -201,7 +202,7 @@ public void loadParametersFromFile() { // for suspicious account CashOutModel.setParam(norm_out_int, suspicious_out_int, norm_out_min, norm_out_max, suspicious_out_min, suspicious_out_max); - + // Create branches (for cash transactions) this.numBranches = simProp.getNumBranches(); if (this.numBranches <= 0) { @@ -210,15 +211,16 @@ public void loadParametersFromFile() { for (int i = 0; i < this.numBranches; i++) { this.branches.add(new Branch(i)); } + */ this.accountFile = simProp.getInputAcctFile(); this.transactionFile = simProp.getInputTxFile(); this.normalModelsFile = simProp.getNormalModelsFile(); this.alertMemberFile = simProp.getInputAlertMemberFile(); - this.counterFile = simProp.getCounterLogFile(); - this.diameterFile = simProp.getDiameterLogFile(); - this.computeDiameter = simProp.isComputeDiameter(); - + //this.counterFile = simProp.getCounterLogFile(); + //this.diameterFile = simProp.getDiameterLogFile(); + //this.computeDiameter = simProp.isComputeDiameter(); + /* if (computeDiameter && diameterFile != null) { try { BufferedWriter writer = new BufferedWriter(new FileWriter(diameterFile)); @@ -233,6 +235,7 @@ public void loadParametersFromFile() { logger.info("Transaction graph diameter computation is disabled"); } } + */ } private static Map getColumnIndices(String header) { @@ -272,7 +275,7 @@ private void loadAccountFile(String accountFile) throws IOException { simProp.getMeanOutcomeSar(), simProp.getStdOutcomeSar()); int index = this.getAccounts().size(); - account.setBranch(this.branches.get(index % this.numBranches)); + //account.setBranch(this.branches.get(index % this.numBranches)); this.getAccounts().add(account); this.idMap.put(accountID, index); this.schedule.scheduleRepeating(account); @@ -509,7 +512,7 @@ public void executeSimulation() { } } txs.flushLog(); - txs.writeCounterLog(numOfSteps, counterFile); + //txs.writeCounterLog(numOfSteps, counterFile); System.out.println(" - Finished running " + step + " steps "); // Finishing the simulation @@ -631,7 +634,7 @@ public static void main(String[] args) { */ // Loading configuration JSON file instead of parsing command line arguments - //String confFile = args[0]; + // String confFile = args[0]; String paramFiles = "10K_accts"; String confFile = "paramFiles/" + paramFiles + "/conf.json"; // debug diff --git a/AMLsim/src/main/java/amlsim/Account.java b/AMLsim/src/main/java/amlsim/Account.java index 19b52eeb..9f916315 100755 --- a/AMLsim/src/main/java/amlsim/Account.java +++ b/AMLsim/src/main/java/amlsim/Account.java @@ -356,7 +356,7 @@ public void step(SimState state) { meanBalance += balance / 28; } meanBalance = meanBalance <= 100.0 ? 1000.0 : meanBalance; - double x = (this.balance - meanBalance) / meanBalance; + double x = (this.balance + cashBalance - meanBalance) / meanBalance; double sigmoid = 1 / (1 + Math.exp(-x)); if (this.random.nextDouble() < sigmoid) { double probSpendCash = -1.0; diff --git a/AMLsim/src/main/java/amlsim/AccountGroup.java b/AMLsim/src/main/java/amlsim/AccountGroup.java index 761bd7b1..a0315e8f 100644 --- a/AMLsim/src/main/java/amlsim/AccountGroup.java +++ b/AMLsim/src/main/java/amlsim/AccountGroup.java @@ -33,6 +33,11 @@ public class AccountGroup { this.endStep = endStep; } + if (scheduleID == 2) { + this.startStep = startStep; + this.endStep = endStep; + } + this.scheduleID = scheduleID; this.interval = interval; this.members = new ArrayList<>(); diff --git a/AMLsim/src/main/java/amlsim/SimProperties.java b/AMLsim/src/main/java/amlsim/SimProperties.java index a363df06..21565ebe 100644 --- a/AMLsim/src/main/java/amlsim/SimProperties.java +++ b/AMLsim/src/main/java/amlsim/SimProperties.java @@ -86,9 +86,9 @@ public class SimProperties { probIncome = defaultProp.getDouble("prob_income"); meanIncome = defaultProp.getDouble("mean_income"); stdIncome = defaultProp.getDouble("std_income"); - probIncome = defaultProp.getDouble("prob_income_sar"); - meanIncome = defaultProp.getDouble("mean_income_sar"); - stdIncome = defaultProp.getDouble("std_income_sar"); + probIncomeSAR = defaultProp.getDouble("prob_income_sar"); + meanIncomeSAR = defaultProp.getDouble("mean_income_sar"); + stdIncomeSAR = defaultProp.getDouble("std_income_sar"); meanOutcome = defaultProp.getDouble("mean_outcome"); stdOutcome = defaultProp.getDouble("std_outcome"); meanOutcomeSar = defaultProp.getDouble("mean_outcome_sar"); @@ -97,8 +97,8 @@ public class SimProperties { System.out.printf("General transaction interval: %d\n", normalTxInterval); System.out.printf("Base transaction amount: Normal = %f, Suspicious= %f\n", minTxAmount, maxTxAmount); - cashInProp = defaultProp.getJSONObject("cash_in"); - cashOutProp = defaultProp.getJSONObject("cash_out"); + //cashInProp = defaultProp.getJSONObject("cash_in"); // TODO: remove? + //cashOutProp = defaultProp.getJSONObject("cash_out"); // TODO: remove? marginRatio = defaultProp.getDouble("margin_ratio"); String envSeed = System.getenv("RANDOM_SEED"); diff --git a/AMLsim/src/main/java/amlsim/model/normal/MutualTransactionModel.java b/AMLsim/src/main/java/amlsim/model/normal/MutualTransactionModel.java index 0e267a9d..a98c6d02 100755 --- a/AMLsim/src/main/java/amlsim/model/normal/MutualTransactionModel.java +++ b/AMLsim/src/main/java/amlsim/model/normal/MutualTransactionModel.java @@ -46,6 +46,7 @@ public void setParameters() { if (scheduleID == FIXED_INTERVAL) { ; } else if (scheduleID == RANDOM_INTERVAL || scheduleID == UNORDERED) { + this.startStep = generateFromInterval(range) + (int) this.startStep; this.interval = generateFromInterval(range) + (int) this.startStep; } else if (scheduleID == SIMULTANEOUS || range < 2) { this.interval = 1; diff --git a/AMLsim/target/classes/amlsim/SimProperties.class b/AMLsim/target/classes/amlsim/SimProperties.class index 75e9cbb8..e0fdd486 100644 Binary files a/AMLsim/target/classes/amlsim/SimProperties.class and b/AMLsim/target/classes/amlsim/SimProperties.class differ diff --git a/AMLsim/target/classes/amlsim/TargetedTransactionAmount.class b/AMLsim/target/classes/amlsim/TargetedTransactionAmount.class index ca972ff2..c5ce45d4 100644 Binary files a/AMLsim/target/classes/amlsim/TargetedTransactionAmount.class and b/AMLsim/target/classes/amlsim/TargetedTransactionAmount.class differ diff --git a/AMLsim/target/classes/amlsim/dists/NormalDist.class b/AMLsim/target/classes/amlsim/dists/NormalDist.class index ef211de3..35bbe3f9 100644 Binary files a/AMLsim/target/classes/amlsim/dists/NormalDist.class and b/AMLsim/target/classes/amlsim/dists/NormalDist.class differ diff --git a/AMLsim/target/classes/amlsim/dists/NormalDistQuick.class b/AMLsim/target/classes/amlsim/dists/NormalDistQuick.class index a36d8dab..e8d6a4af 100644 Binary files a/AMLsim/target/classes/amlsim/dists/NormalDistQuick.class and b/AMLsim/target/classes/amlsim/dists/NormalDistQuick.class differ diff --git a/AMLsim/target/classes/amlsim/dists/Num.class b/AMLsim/target/classes/amlsim/dists/Num.class index 6a1c92f0..7b7f9352 100644 Binary files a/AMLsim/target/classes/amlsim/dists/Num.class and b/AMLsim/target/classes/amlsim/dists/Num.class differ diff --git a/AMLsim/target/classes/amlsim/dists/TruncatedNormal.class b/AMLsim/target/classes/amlsim/dists/TruncatedNormal.class index 1fac8837..be504d7e 100644 Binary files a/AMLsim/target/classes/amlsim/dists/TruncatedNormal.class and b/AMLsim/target/classes/amlsim/dists/TruncatedNormal.class differ diff --git a/AMLsim/target/classes/amlsim/dists/TruncatedNormalQuick.class b/AMLsim/target/classes/amlsim/dists/TruncatedNormalQuick.class index 68f4a71e..febc18dc 100644 Binary files a/AMLsim/target/classes/amlsim/dists/TruncatedNormalQuick.class and b/AMLsim/target/classes/amlsim/dists/TruncatedNormalQuick.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/BipartiteTypology.class b/AMLsim/target/classes/amlsim/model/aml/BipartiteTypology.class index 52fdad1a..fecb754d 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/BipartiteTypology.class and b/AMLsim/target/classes/amlsim/model/aml/BipartiteTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/CycleTypology.class b/AMLsim/target/classes/amlsim/model/aml/CycleTypology.class index 659d404c..4200b356 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/CycleTypology.class and b/AMLsim/target/classes/amlsim/model/aml/CycleTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/FanInTypology.class b/AMLsim/target/classes/amlsim/model/aml/FanInTypology.class index e9cf1b16..6874f4b5 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/FanInTypology.class and b/AMLsim/target/classes/amlsim/model/aml/FanInTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/FanOutTypology.class b/AMLsim/target/classes/amlsim/model/aml/FanOutTypology.class index bf28e151..b71e4247 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/FanOutTypology.class and b/AMLsim/target/classes/amlsim/model/aml/FanOutTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/GatherScatterTypology.class b/AMLsim/target/classes/amlsim/model/aml/GatherScatterTypology.class index a9446767..f3fd1ee1 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/GatherScatterTypology.class and b/AMLsim/target/classes/amlsim/model/aml/GatherScatterTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/RandomTypology.class b/AMLsim/target/classes/amlsim/model/aml/RandomTypology.class index 62a3ef48..abafc397 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/RandomTypology.class and b/AMLsim/target/classes/amlsim/model/aml/RandomTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/ScatterGatherTypology.class b/AMLsim/target/classes/amlsim/model/aml/ScatterGatherTypology.class index eb10bd10..8e188f49 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/ScatterGatherTypology.class and b/AMLsim/target/classes/amlsim/model/aml/ScatterGatherTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/aml/StackTypology.class b/AMLsim/target/classes/amlsim/model/aml/StackTypology.class index b93d8110..488a5a1b 100644 Binary files a/AMLsim/target/classes/amlsim/model/aml/StackTypology.class and b/AMLsim/target/classes/amlsim/model/aml/StackTypology.class differ diff --git a/AMLsim/target/classes/amlsim/model/normal/FanInTransactionModel.class b/AMLsim/target/classes/amlsim/model/normal/FanInTransactionModel.class index 38f2b74e..4b398e8b 100644 Binary files a/AMLsim/target/classes/amlsim/model/normal/FanInTransactionModel.class and b/AMLsim/target/classes/amlsim/model/normal/FanInTransactionModel.class differ diff --git a/AMLsim/target/classes/amlsim/model/normal/FanOutTransactionModel.class b/AMLsim/target/classes/amlsim/model/normal/FanOutTransactionModel.class index 756adc68..2693e88f 100644 Binary files a/AMLsim/target/classes/amlsim/model/normal/FanOutTransactionModel.class and b/AMLsim/target/classes/amlsim/model/normal/FanOutTransactionModel.class differ diff --git a/AMLsim/target/classes/amlsim/model/normal/ForwardTransactionModel.class b/AMLsim/target/classes/amlsim/model/normal/ForwardTransactionModel.class index 4c5cd56b..b789f2dc 100644 Binary files a/AMLsim/target/classes/amlsim/model/normal/ForwardTransactionModel.class and b/AMLsim/target/classes/amlsim/model/normal/ForwardTransactionModel.class differ diff --git a/AMLsim/target/classes/amlsim/model/normal/MutualTransactionModel.class b/AMLsim/target/classes/amlsim/model/normal/MutualTransactionModel.class index 5a761b85..3d6358f9 100644 Binary files a/AMLsim/target/classes/amlsim/model/normal/MutualTransactionModel.class and b/AMLsim/target/classes/amlsim/model/normal/MutualTransactionModel.class differ diff --git a/AMLsim/target/classes/amlsim/model/normal/PeriodicalTransactionModel.class b/AMLsim/target/classes/amlsim/model/normal/PeriodicalTransactionModel.class index 779386cb..be2fa2b3 100644 Binary files a/AMLsim/target/classes/amlsim/model/normal/PeriodicalTransactionModel.class and b/AMLsim/target/classes/amlsim/model/normal/PeriodicalTransactionModel.class differ diff --git a/AMLsim/target/classes/amlsim/model/normal/SingleTransactionModel.class b/AMLsim/target/classes/amlsim/model/normal/SingleTransactionModel.class index 3b5eb202..71244162 100644 Binary files a/AMLsim/target/classes/amlsim/model/normal/SingleTransactionModel.class and b/AMLsim/target/classes/amlsim/model/normal/SingleTransactionModel.class differ diff --git a/AMLsim/target/test-classes/amlsim/TargetedTransactionAmountTests.class b/AMLsim/target/test-classes/amlsim/TargetedTransactionAmountTests.class index 972464e7..8db1c51b 100644 Binary files a/AMLsim/target/test-classes/amlsim/TargetedTransactionAmountTests.class and b/AMLsim/target/test-classes/amlsim/TargetedTransactionAmountTests.class differ diff --git a/gnn/.gitignore b/gnn/.gitignore new file mode 100644 index 00000000..285877ca --- /dev/null +++ b/gnn/.gitignore @@ -0,0 +1,2 @@ +data +models \ No newline at end of file diff --git a/gnn/README.md b/gnn/README.md new file mode 100644 index 00000000..8210f404 --- /dev/null +++ b/gnn/README.md @@ -0,0 +1,9 @@ +## installation + +# anaconda +1. conda create -n gnn python==3.11.5 pandas==2.1.1 numpy==1.26 pytorch==2.1 pytorch-cuda=12.1 pyg -c pytorch -c nvidia -c pyg +2. conda activate gnn + +# pip +to be added + diff --git a/gnn/__pycache__/data.cpython-311.pyc b/gnn/__pycache__/data.cpython-311.pyc new file mode 100644 index 00000000..07819fef Binary files /dev/null and b/gnn/__pycache__/data.cpython-311.pyc differ diff --git a/gnn/__pycache__/modules.cpython-311.pyc b/gnn/__pycache__/modules.cpython-311.pyc new file mode 100644 index 00000000..0784eeb3 Binary files /dev/null and b/gnn/__pycache__/modules.cpython-311.pyc differ diff --git a/gnn/data.py b/gnn/data.py new file mode 100644 index 00000000..b9547eca --- /dev/null +++ b/gnn/data.py @@ -0,0 +1,63 @@ +import pandas as pd +import torch +from torch_geometric.data import Data +import numpy as np +from sklearn.model_selection import train_test_split + +class EllipticDataset(): + def __init__(self, data_folder, val_size=0.2, test_size=0.2, seed=42): + # read in labels + classes = pd.read_csv(data_folder + "/elliptic_txs_classes.csv") + # read in edge pairs + edges = pd.read_csv(data_folder + "/elliptic_txs_edgelist.csv") + # read in features + features = pd.read_csv(data_folder + "/elliptic_txs_features.csv", header=None) + + # remap class, licit: 0, illicit: 1, unknown: -1 + classes["class"] = classes["class"].map({"1": 1, "2": 0, "unknown": -1}) + + # merge features and labels + df = features.merge(classes, how="left", left_on=0, right_on="txId") + df = df.sort_values(0).reset_index(drop=True) + assert len(df) == len(classes) + + # drop unclassified and isolated nodes + classified_nodes = set(classes[classes["class"] != -1]["txId"].values) + assert len(classified_nodes) == 46564 + classified_edges = edges[(edges["txId1"].isin(classified_nodes)) & (edges["txId2"].isin(classified_nodes))].copy() + non_isolated_nodes = set(classified_edges["txId1"].values).union(classified_edges["txId2"].values) + classified_df = df[df[0].isin(non_isolated_nodes)].copy() + + # reindex nodes + classified_df = classified_df.sort_values(1).reset_index(drop=True) + old2new = {old:new for new, old in enumerate(classified_df[0].values)} + classified_edges["txId1"] = classified_edges["txId1"].map(old2new) + classified_edges["txId2"] = classified_edges["txId2"].map(old2new) + classified_df[0] = classified_df[0].map(old2new) + + # edges + edge_index = torch.tensor(classified_edges.values, dtype=torch.long) + edge_index = edge_index.t().contiguous() + + # labels + labels = classified_df["class"].values + labels = torch.tensor(labels, dtype=torch.float) + + # timestamps + timestamps = set(classified_df[1].values) + + # features + features = torch.tensor(classified_df.drop([0, 1, "class", "txId"], axis=1).values, dtype=torch.float) + + # construct torch_geometric.data.Data + self.data = Data(x=features, edge_index=edge_index, y=labels) + + # generate array of indices + indices = np.arange(len(labels)) + + # split indices into train, val, and test sets + self.train_indices, test_indices, self.train_labels, test_labels = train_test_split(indices, labels, test_size=val_size+test_size, stratify=labels, random_state=42) + self.val_indices, self.test_indices, self.val_labels, self.test_labels = train_test_split(test_indices, test_labels, test_size=test_size/(val_size+test_size), stratify=test_labels, random_state=42) + + def get_data(self): + return self.data, self.train_indices, self.val_indices, self.test_indices, self.train_labels, self.val_labels, self.test_labels diff --git a/gnn/main.py b/gnn/main.py new file mode 100644 index 00000000..32e9a9d8 --- /dev/null +++ b/gnn/main.py @@ -0,0 +1,109 @@ +import torch +import torch.optim as optim +import optuna +from optuna.trial import TrialState +from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, confusion_matrix +import networkx as nx +import matplotlib.pyplot as plt + +from modules import GCN +from data import EllipticDataset + +def define_gcn(trial): + n_layers = trial.suggest_int("n_layers", 2, 5) + hidden_dim = trial.suggest_int("hidden_dim", 2**5, 2**8, log=True) + dropout = trial.suggest_float("dropout", 0.3, 0.7) + return GCN(165,hidden_dim,2,n_layers,dropout) + +def objective_gcn(trial, data, train_indices, val_indices, device): + model = define_gcn(trial).to(device) + + optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) + lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True) + optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr) + criterion = torch.nn.BCELoss() + t = trial.suggest_float("t", 0.2, 0.6) + for epoch in range(100): + + model.train() + data = data.to(device) + optimizer.zero_grad() + out = model(data) + + tmp = torch.nn.functional.one_hot(data.y.type(torch.long)).type(torch.float) + loss = criterion(out[train_indices], tmp[train_indices]) + y = out.detach()[:, 1] + y = (y > t).type(torch.long) + f1 = f1_score(data.y.cpu()[train_indices], y.cpu()[train_indices]) + + loss.backward() + optimizer.step() + + model.eval() + with torch.no_grad(): + valf1 = f1_score(data.y.cpu()[val_indices], y.cpu()[val_indices]) + trial.report(valf1, epoch) + + if trial.should_prune(): + raise optuna.exceptions.TrialPruned() + + torch.save(model.state_dict(), "models/gcn-" + str(trial.number) + ".pth") + return valf1 + +def eval_gcn(device): + # load data + elliptic_data = EllipticDataset("data/elliptic_bitcoin_dataset", val_size=0.15, test_size=0.15, seed=42) + data, train_indices, val_indices, test_indices, train_labels, val_labels, test_labels = elliptic_data.get_data() + + # train and optimize hyperparameters + study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=42)) + study.optimize( + lambda trial: objective_gcn(trial, data, train_indices, val_indices, device), n_trials=100, timeout=10000, + ) + + # result of hyperparamter optimization + pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED]) + complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE]) + print("Study statistics: ") + print(" Number of finished trials: ", len(study.trials)) + print(" Number of pruned trials: ", len(pruned_trials)) + print(" Number of complete trials: ", len(complete_trials)) + + # retrieve best trial from hyperparameter optimization + print("Best trial:") + trial = study.best_trial + print(" Value: ", trial.value) + print(" Params: ") + for key, value in trial.params.items(): + print(" {}: {}".format(key, value)) + print("\t Trial number: ", trial.number) + + # reconstruct best trained model + state_dict = torch.load("models/gcn-" + str(trial.number) + ".pth") + #files.download("gcn-" + str(trial.number) + ".pth") + model = GCN(165,trial.params["hidden_dim"],2,trial.params["n_layers"],trial.params["dropout"]) + model.load_state_dict(state_dict) + + # evaluate best trained model using test set + model.to(device) + model.eval() + out = model(data) + tmp = torch.nn.functional.one_hot(data.y.type(torch.long)).type(torch.float) + y = out.detach()[:, 1] + y = (y > trial.params["t"]).type(torch.long) + f1 = f1_score(data.y.cpu()[test_indices], y.cpu()[test_indices]) + acc = accuracy_score(data.y.cpu()[test_indices], y.cpu()[test_indices]) + pre = precision_score(data.y.cpu()[test_indices], y.cpu()[test_indices]) + rec = recall_score(data.y.cpu()[test_indices], y.cpu()[test_indices]) + print("test performance:") + print(f"\t f1: {f1}") + print(f"\t acc: {acc}") + print(f"\t pre: {pre}") + print(f"\t rec: {rec}") + +def main(): + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + eval_gcn(device) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/gnn/modules.py b/gnn/modules.py new file mode 100644 index 00000000..a4211009 --- /dev/null +++ b/gnn/modules.py @@ -0,0 +1,74 @@ +import torch +from torch.nn import functional as F +import torch_geometric +from torch_geometric.nn import GCNConv +from torch_geometric.data import Data + +class GCNLPA(torch.nn.Module): + def __init__(self, input_dim, hidden_dim, output_dim, + num_layers, dropout, edge_dim, k, device): + super(GCNLPA, self).__init__() + self.device = device + convs = [GCNConv(input_dim, hidden_dim)] + convs += [GCNConv(hidden_dim, hidden_dim) for _ in range(num_layers-2)] + convs += [GCNConv(hidden_dim, output_dim)] + self.convs = torch.nn.ModuleList(convs) + self.bns = torch.nn.ModuleList( + [torch.nn.BatchNorm1d(hidden_dim) for _ in range(num_layers-1)] + ) + self.softmax = torch.nn.Softmax(dim=1) + self.dropout = dropout + self.edge_weight = torch.nn.Parameter(torch.ones(edge_dim)) + self.k = k + + + def forward(self, data, adj_t=None): + x, edge_index = data.x, data.edge_index + for i, layer in enumerate(self.convs): + x = layer(x, edge_index, self.edge_weight.sigmoid()) + if i < len(self.convs)-1: + x = self.bns[i](x) + x = F.relu(x) + x = F.dropout(x, p=self.dropout, training=self.training) + out = self.softmax(x) + # LPA implementation with dense format + labels = torch.nn.functional.one_hot(data.y.type(torch.long)).type(torch.float) + matrix = torch_geometric.utils.to_dense_adj( + data.edge_index, + edge_attr=self.edge_weight.sigmoid(), + max_num_nodes=data.num_nodes + ) + matrix = matrix.squeeze(0) + selfloop = torch.diag(torch.ones(matrix.shape[0])).to(self.device) + matrix += selfloop + for _ in range(self.k): + y = torch.matmul(matrix, labels) + labels = y + return out, torch.nn.functional.normalize(labels, dim=1) + +class GCN(torch.nn.Module): + def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout): + super(GCN, self).__init__() + + convs = [GCNConv(input_dim, hidden_dim)] + [GCNConv(hidden_dim, hidden_dim) for _ in range(num_layers-2)] + [GCNConv(hidden_dim, output_dim)] + self.convs = torch.nn.ModuleList(convs) + self.bns = torch.nn.ModuleList([torch.nn.BatchNorm1d(hidden_dim) for _ in range(num_layers-1)]) + self.dropout = dropout + self.softmax = torch.nn.Softmax(dim=1) + + def reset_parameters(self): + for conv in self.convs: + conv.reset_parameters() + for bn in self.bns: + bn.reset_parameters() + + def forward(self, data, adj_t=None): + x, edge_index = data.x, data.edge_index + for i, layer in enumerate(self.convs): + x = layer(x, edge_index) + if i < len(self.convs)-1: + x = self.bns[i](x) + x = F.relu(x) + x = F.dropout(x, p=self.dropout, training=self.training) + out = self.softmax(x) + return out \ No newline at end of file diff --git a/transaction-network-explorer/TransactionNetwork.py b/transaction-network-explorer/TransactionNetwork.py index 20a5fabf..0bc4bfd9 100644 --- a/transaction-network-explorer/TransactionNetwork.py +++ b/transaction-network-explorer/TransactionNetwork.py @@ -200,7 +200,8 @@ def get_balances(self, index): df1['amount'] = -df1['amount'] df = pd.concat([df1, df2]).reset_index(drop=True) df = df.sort_values(by=['step']) - df['balance'] = df.groupby('name')['amount'].cumsum() + gb = df.groupby('name') + df['balance'] = gb['amount'].cumsum() ''' df1 = self.df[self.df['nameOrig'].isin(names)][['nameOrig', 'step', 'newbalanceOrig']] diff --git a/transaction-network-explorer/__pycache__/TransactionNetwork.cpython-310.pyc b/transaction-network-explorer/__pycache__/TransactionNetwork.cpython-310.pyc index 73fffdcf..461c1656 100644 Binary files a/transaction-network-explorer/__pycache__/TransactionNetwork.cpython-310.pyc and b/transaction-network-explorer/__pycache__/TransactionNetwork.cpython-310.pyc differ diff --git a/transaction-network-explorer/tne-cpu.ipynb b/transaction-network-explorer/tne-cpu.ipynb index 189ac4fc..5c5f999c 100644 --- a/transaction-network-explorer/tne-cpu.ipynb +++ b/transaction-network-explorer/tne-cpu.ipynb @@ -206,12 +206,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "41c6ba5b34814ee9bb620e77e1b8fba4", + "model_id": "8a3a157f3f69441ca45bf2d1675e0140", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "BokehModel(combine_events=True, render_bundle={'docs_json': {'5e6a3ea1-acc1-41d7-9d10-1b09c5dadf6f': {'version…" + "BokehModel(combine_events=True, render_bundle={'docs_json': {'ab8aa9ae-33f6-463a-9e0d-dcdc6da8101c': {'version…" ] }, "execution_count": 2,