Skip to content

Commit

Permalink
restructured files and add function to set same temporal parameters i…
Browse files Browse the repository at this point in the history
…n conf.json
  • Loading branch information
TheColdIce committed Aug 26, 2024
1 parent da8103b commit d25344a
Show file tree
Hide file tree
Showing 23 changed files with 363,707 additions and 66 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
363,552 changes: 363,552 additions & 0 deletions auto-aml-data-gen/data/tmp/tmp/tx_log.csv

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
69 changes: 49 additions & 20 deletions auto-aml-data-gen/main.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,63 @@
from simulate import init_params, create_param_files, run_simulation
import simulate as sim
from preprocess import preprocess
from classifier import Classifier
from optimizer import Optimizer
import utils
import optuna
import time
import sys


def main(n_trials:int=10, ratio=0.05, operating_recall:float=0.8, target:float=0.95):
optimizer = Optimizer(target=target, max=0.4244, operating_recall=operating_recall, ratio=ratio)
best_trials = optimizer.optimize(n_trials=n_trials)
for trial in best_trials:
print(f'\ntrial: {trial.number}')
print(f'values: {trial.values}')
with open('log.txt', 'a') as f:
f.write(f'\ntrial: {trial.number}\n')
f.write(f'values: {trial.values}\n')
for param in trial.params:
print(f'{param}: {trial.params[param]}')
with open('log.txt', 'a') as f:
f.write(f'{param}: {trial.params[param]}\n')
def main(config_path:str, n_trials:int=10, ratio=0.05, operating_recall:float=0.8, fpr_target:float=0.95):
print('\n##======== Automatic tuner for AMLsim parameters ========##\n')
print(f'config_path: {config_path}')
print(f'n_trials: {n_trials}')
print(f'ratio: {ratio}')
print(f'operating_recall: {operating_recall}')
print(f'target: {fpr_target}')

# set output dir
utils.set_output_path(config_path)

# find max fpr
utils.set_same_temp_params(config_path)
sim.run_simulation(config_path)

#optimizer = Optimizer(target=fpr_target, max=0.4244, operating_recall=operating_recall, ratio=ratio)
#best_trials = optimizer.optimize(n_trials=n_trials)
#for trial in best_trials:
# print(f'\ntrial: {trial.number}')
# print(f'values: {trial.values}')
# with open('log.txt', 'a') as f:
# f.write(f'\ntrial: {trial.number}\n')
# f.write(f'values: {trial.values}\n')
# for param in trial.params:
# print(f'{param}: {trial.params[param]}')
# with open('log.txt', 'a') as f:
# f.write(f'{param}: {trial.params[param]}\n')
return

if __name__ == '__main__':

# Default values
config_path = '/home/edvin/Desktop/flib/auto-aml-data-gen/param_files/tmp/conf.json'
n_trials = 1
ratio = 0.05 # OBS: approximate ratio of SARs in the dataset, error of about 0.02 percentage points
ratio = 0.01
operating_recall = 0.9
target = 0.95

t = time.time()
fpr_target = 0.95

main(n_trials, ratio, operating_recall, target)
argv = sys.argv
for i, arg in enumerate(argv):
if '--config' == arg:
config_path = argv[i+1]
if '--n_trials' == arg:
n_trials = int(argv[i+1])
if '--ratio' == arg:
ratio = float(argv[i+1])
if '--operating_recall' == arg:
operating_recall = float(argv[i+1])
if '--fpr_target' == arg:
fpr_target = float(argv[i+1])

print(f'\nTime elapsed: {time.time()-t:.2f} seconds')
main(config_path, n_trials, ratio, operating_recall, fpr_target)

42 changes: 0 additions & 42 deletions auto-aml-data-gen/param_files/test/conf.json

This file was deleted.

71 changes: 71 additions & 0 deletions auto-aml-data-gen/param_files/tmp/conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"general": {
"random_seed": 0,
"simulation_name": "tmp",
"total_steps": 30
},
"default": {
"min_amount": 1,
"max_amount": 150000,
"mean_amount": 637,
"std_amount": 300,
"mean_amount_sar": 637,
"std_amount_sar": 300,
"prob_income": 0.0,
"mean_income": 0.0,
"std_income": 0.0,
"prob_income_sar": 0.0,
"mean_income_sar": 0.0,
"std_income_sar": 0.0,
"mean_outcome": 500.0,
"std_outcome": 100.0,
"mean_outcome_sar": 500.0,
"std_outcome_sar": 100.0,
"prob_spend_cash": 0.0,
"n_steps_balance_history": 7,
"mean_phone_change_frequency": 1460,
"std_phone_change_frequency": 365,
"mean_phone_change_frequency_sar": 1460,
"std_phone_change_frequency_sar": 365,
"mean_bank_change_frequency": 1460,
"std_bank_change_frequency": 365,
"mean_bank_change_frequency_sar": 1460,
"std_bank_change_frequency_sar": 365,
"margin_ratio": 0.1,
"prob_participate_in_multiple_sars": 0.0
},
"input": {
"directory": "/home/edvin/Desktop/flib/auto-aml-data-gen/param_files/tmp",
"schema": "schema.json",
"accounts": "accounts.csv",
"alert_patterns": "alertPatterns.csv",
"normal_models": "normalModels.csv",
"degree": "degree.csv",
"transaction_type": "transactionType.csv",
"is_aggregated_accounts": true
},
"temporal": {
"directory": "tmp",
"transactions": "transactions.csv",
"accounts": "accounts.csv",
"alert_members": "alert_members.csv",
"normal_models": "normal_models.csv"
},
"output": {
"directory": "/home/edvin/Desktop/flib/auto-aml-data-gen/data/tmp",
"transaction_log": "tx_log.csv"
},
"graph_generator": {
"degree_threshold": 1
},
"simulator": {
"transaction_limit": 1000000,
"transaction_interval": 7,
"sar_interval": 7
},
"scale-free": {
"gamma": 2.0,
"loc": 1.0,
"scale": 1.0
}
}
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
6 changes: 3 additions & 3 deletions auto-aml-data-gen/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ def create_param_files(params:dict, param_files_folder:str):
f.write('Transfer,1\n')


def run_simulation(param_files_folder:str):
os.system(f'cd /home/edvin/Desktop/flib/AMLsim && python3 scripts/transaction_graph_generator.py "{param_files_folder}/conf.json"')
os.system(f'cd /home/edvin/Desktop/flib/AMLsim && mvn exec:java -Dexec.mainClass=amlsim.AMLSim -Dexec.args="{param_files_folder}/conf.json"')
def run_simulation(config_path:str):
os.system(f'cd ../AMLsim && python3 scripts/transaction_graph_generator.py "{config_path}"')
os.system(f'cd ../AMLsim && mvn exec:java -Dexec.mainClass=amlsim.AMLSim -Dexec.args="{config_path}"')
return


Expand Down
33 changes: 32 additions & 1 deletion auto-aml-data-gen/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,35 @@ def read_bounds(folder:str):
return conf_bounds


read_bounds('param_files/test')
def set_output_path(config_path):
with open(config_path, 'r') as f:
config = json.load(f)
sim_name = config['general']['simulation_name']
pwd = os.getcwd()
config['output']['directory'] = f'{pwd}/data/{sim_name}'
with open(config_path, 'w') as f:
json.dump(config, f, indent=2)
return


def set_same_temp_params(config_path:str):
with open(config_path, 'r') as f:
config = json.load(f)

config["default"]["mean_amount_sar"] = config["default"]["mean_amount"]
config["default"]["std_amount_sar"] = config["default"]["std_amount"]
config["default"]["mean_income_sar"] = config["default"]["mean_income"]
config["default"]["std_income_sar"] = config["default"]["std_income"]
config["default"]["mean_outcome_sar"] = config["default"]["mean_outcome"]
config["default"]["std_outcome_sar"] = config["default"]["std_outcome"]
config["default"]["mean_phone_change_frequency_sar"] = config["default"]["mean_phone_change_frequency"]
config["default"]["std_phone_change_frequency_sar"] = config["default"]["std_phone_change_frequency"]
config["default"]["mean_bank_change_frequency_sar"] = config["default"]["mean_bank_change_frequency"]
config["default"]["std_bank_change_frequency_sar"] = config["default"]["std_bank_change_frequency"]
config["default"]["prob_spend_cash"] = 0.0
config["default"]["prob_participate_in_multiple_sars"] = 0.0

with open(config_path, 'w') as f:
json.dump(config, f, indent=2)

return

0 comments on commit d25344a

Please sign in to comment.