restructured files and add function to set same temporal parameters i…

…n conf.json
aidotse · Aug 26, 2024 · d25344a · d25344a
1 parent da8103b
commit d25344a
Show file tree

Hide file tree

Showing 23 changed files with 363,707 additions and 66 deletions.
diff --git a/auto-aml-data-gen/__pycache__/classifier.cpython-310.pyc b/auto-aml-data-gen/__pycache__/classifier.cpython-310.pyc
diff --git a/auto-aml-data-gen/__pycache__/optimizer.cpython-310.pyc b/auto-aml-data-gen/__pycache__/optimizer.cpython-310.pyc
diff --git a/auto-aml-data-gen/__pycache__/preprocess.cpython-310.pyc b/auto-aml-data-gen/__pycache__/preprocess.cpython-310.pyc
diff --git a/auto-aml-data-gen/__pycache__/simulate.cpython-310.pyc b/auto-aml-data-gen/__pycache__/simulate.cpython-310.pyc
diff --git a/auto-aml-data-gen/__pycache__/utils.cpython-310.pyc b/auto-aml-data-gen/__pycache__/utils.cpython-310.pyc
diff --git a/auto-aml-data-gen/data/tmp/tmp/tx_log.csv b/auto-aml-data-gen/data/tmp/tmp/tx_log.csv
diff --git a/auto-aml-data-gen/best_params.txt → auto-aml-data-gen/logs/best_params.txt b/auto-aml-data-gen/best_params.txt → auto-aml-data-gen/logs/best_params.txt
diff --git a/auto-aml-data-gen/log.txt → auto-aml-data-gen/logs/log.txt b/auto-aml-data-gen/log.txt → auto-aml-data-gen/logs/log.txt
diff --git a/auto-aml-data-gen/main.py b/auto-aml-data-gen/main.py
@@ -1,34 +1,63 @@
-from simulate import init_params, create_param_files, run_simulation
+import simulate as sim 
 from preprocess import preprocess
 from classifier import Classifier
 from optimizer import Optimizer
+import utils
 import optuna
 import time
+import sys
 
 
-def main(n_trials:int=10, ratio=0.05, operating_recall:float=0.8, target:float=0.95):
-    optimizer = Optimizer(target=target, max=0.4244, operating_recall=operating_recall, ratio=ratio)
-    best_trials = optimizer.optimize(n_trials=n_trials)
-    for trial in best_trials:
-        print(f'\ntrial: {trial.number}')
-        print(f'values: {trial.values}')
-        with open('log.txt', 'a') as f:
-            f.write(f'\ntrial: {trial.number}\n')
-            f.write(f'values: {trial.values}\n')
-        for param in trial.params:
-            print(f'{param}: {trial.params[param]}')
-            with open('log.txt', 'a') as f:
-                f.write(f'{param}: {trial.params[param]}\n')
+def main(config_path:str, n_trials:int=10, ratio=0.05, operating_recall:float=0.8, fpr_target:float=0.95):
+    print('\n##======== Automatic tuner for AMLsim parameters ========##\n')
+    print(f'config_path: {config_path}')
+    print(f'n_trials: {n_trials}')
+    print(f'ratio: {ratio}')
+    print(f'operating_recall: {operating_recall}')
+    print(f'target: {fpr_target}')
+
+    # set output dir
+    utils.set_output_path(config_path)
+
+    # find max fpr
+    utils.set_same_temp_params(config_path)
+    sim.run_simulation(config_path)
+
+    #optimizer = Optimizer(target=fpr_target, max=0.4244, operating_recall=operating_recall, ratio=ratio)
+    #best_trials = optimizer.optimize(n_trials=n_trials)
+    #for trial in best_trials:
+    #    print(f'\ntrial: {trial.number}')
+    #    print(f'values: {trial.values}')
+    #    with open('log.txt', 'a') as f:
+    #        f.write(f'\ntrial: {trial.number}\n')
+    #        f.write(f'values: {trial.values}\n')
+    #    for param in trial.params:
+    #        print(f'{param}: {trial.params[param]}')
+    #        with open('log.txt', 'a') as f:
+    #            f.write(f'{param}: {trial.params[param]}\n')
     return
 
 if __name__ == '__main__':
+
+    # Default values
+    config_path = '/home/edvin/Desktop/flib/auto-aml-data-gen/param_files/tmp/conf.json'
     n_trials = 1
-    ratio = 0.05 # OBS: approximate ratio of SARs in the dataset, error of about 0.02 percentage points
+    ratio = 0.01
     operating_recall = 0.9
-    target = 0.95
-
-    t = time.time()
+    fpr_target = 0.95
 
-    main(n_trials, ratio, operating_recall, target)
+    argv = sys.argv
+    for i, arg in enumerate(argv):
+        if '--config' == arg:
+            config_path = argv[i+1]
+        if '--n_trials' == arg:
+            n_trials = int(argv[i+1])
+        if '--ratio' == arg:
+            ratio = float(argv[i+1])
+        if '--operating_recall' == arg:
+            operating_recall = float(argv[i+1])
+        if '--fpr_target' == arg:
+            fpr_target = float(argv[i+1])
 
-    print(f'\nTime elapsed: {time.time()-t:.2f} seconds')
+    main(config_path, n_trials, ratio, operating_recall, fpr_target)
+
diff --git a/auto-aml-data-gen/param_files/test/conf.json b/auto-aml-data-gen/param_files/test/conf.json
diff --git a/...ml-data-gen/param_files/test/accounts.csv → ...aml-data-gen/param_files/tmp/accounts.csv b/...ml-data-gen/param_files/test/accounts.csv → ...aml-data-gen/param_files/tmp/accounts.csv
diff --git a/...a-gen/param_files/test/alert_patterns.csv → ...ta-gen/param_files/tmp/alert_patterns.csv b/...a-gen/param_files/test/alert_patterns.csv → ...ta-gen/param_files/tmp/alert_patterns.csv
diff --git a/auto-aml-data-gen/param_files/tmp/conf.json b/auto-aml-data-gen/param_files/tmp/conf.json
@@ -0,0 +1,71 @@
+{
+  "general": {
+    "random_seed": 0,
+    "simulation_name": "tmp",
+    "total_steps": 30
+  },
+  "default": {
+    "min_amount": 1,
+    "max_amount": 150000,
+    "mean_amount": 637,
+    "std_amount": 300,
+    "mean_amount_sar": 637,
+    "std_amount_sar": 300,
+    "prob_income": 0.0,
+    "mean_income": 0.0,
+    "std_income": 0.0,
+    "prob_income_sar": 0.0,
+    "mean_income_sar": 0.0,
+    "std_income_sar": 0.0,
+    "mean_outcome": 500.0,
+    "std_outcome": 100.0,
+    "mean_outcome_sar": 500.0,
+    "std_outcome_sar": 100.0,
+    "prob_spend_cash": 0.0,
+    "n_steps_balance_history": 7,
+    "mean_phone_change_frequency": 1460,
+    "std_phone_change_frequency": 365,
+    "mean_phone_change_frequency_sar": 1460,
+    "std_phone_change_frequency_sar": 365,
+    "mean_bank_change_frequency": 1460,
+    "std_bank_change_frequency": 365,
+    "mean_bank_change_frequency_sar": 1460,
+    "std_bank_change_frequency_sar": 365,
+    "margin_ratio": 0.1,
+    "prob_participate_in_multiple_sars": 0.0
+  },
+  "input": {
+    "directory": "/home/edvin/Desktop/flib/auto-aml-data-gen/param_files/tmp",
+    "schema": "schema.json",
+    "accounts": "accounts.csv",
+    "alert_patterns": "alertPatterns.csv",
+    "normal_models": "normalModels.csv",
+    "degree": "degree.csv",
+    "transaction_type": "transactionType.csv",
+    "is_aggregated_accounts": true
+  },
+  "temporal": {
+    "directory": "tmp",
+    "transactions": "transactions.csv",
+    "accounts": "accounts.csv",
+    "alert_members": "alert_members.csv",
+    "normal_models": "normal_models.csv"
+  },
+  "output": {
+    "directory": "/home/edvin/Desktop/flib/auto-aml-data-gen/data/tmp",
+    "transaction_log": "tx_log.csv"
+  },
+  "graph_generator": {
+    "degree_threshold": 1
+  },
+  "simulator": {
+    "transaction_limit": 1000000,
+    "transaction_interval": 7,
+    "sar_interval": 7
+  },
+  "scale-free": {
+    "gamma": 2.0,
+    "loc": 1.0,
+    "scale": 1.0
+  }
+}
diff --git a/...-aml-data-gen/param_files/test/degree.csv → auto-aml-data-gen/param_files/tmp/degree.csv b/...-aml-data-gen/param_files/test/degree.csv → auto-aml-data-gen/param_files/tmp/degree.csv
diff --git a/...ta-gen/param_files/test/normal_models.csv → ...ata-gen/param_files/tmp/normal_models.csv b/...ta-gen/param_files/test/normal_models.csv → ...ata-gen/param_files/tmp/normal_models.csv
diff --git a/...-gen/param_files/test/transactionType.csv → ...a-gen/param_files/tmp/transactionType.csv b/...-gen/param_files/test/transactionType.csv → ...a-gen/param_files/tmp/transactionType.csv
diff --git a/auto-aml-data-gen/pareto_front.png → auto-aml-data-gen/plots/pareto_front.png b/auto-aml-data-gen/pareto_front.png → auto-aml-data-gen/plots/pareto_front.png
diff --git a/auto-aml-data-gen/pareto_front_1.png → auto-aml-data-gen/plots/pareto_front_1.png b/auto-aml-data-gen/pareto_front_1.png → auto-aml-data-gen/plots/pareto_front_1.png
diff --git a/auto-aml-data-gen/pareto_front_NSGAII.png → ...ml-data-gen/plots/pareto_front_NSGAII.png b/auto-aml-data-gen/pareto_front_NSGAII.png → ...ml-data-gen/plots/pareto_front_NSGAII.png
diff --git a/auto-aml-data-gen/pareto_front_TPE.png → auto-aml-data-gen/plots/pareto_front_TPE.png b/auto-aml-data-gen/pareto_front_TPE.png → auto-aml-data-gen/plots/pareto_front_TPE.png
diff --git a/auto-aml-data-gen/pareto_front_tmp.png → auto-aml-data-gen/plots/pareto_front_tmp.png b/auto-aml-data-gen/pareto_front_tmp.png → auto-aml-data-gen/plots/pareto_front_tmp.png
diff --git a/auto-aml-data-gen/simulate.py b/auto-aml-data-gen/simulate.py
@@ -148,9 +148,9 @@ def create_param_files(params:dict, param_files_folder:str):
         f.write('Transfer,1\n')
 
 
-def run_simulation(param_files_folder:str):
-    os.system(f'cd /home/edvin/Desktop/flib/AMLsim && python3 scripts/transaction_graph_generator.py "{param_files_folder}/conf.json"')
-    os.system(f'cd /home/edvin/Desktop/flib/AMLsim && mvn exec:java -Dexec.mainClass=amlsim.AMLSim -Dexec.args="{param_files_folder}/conf.json"')
+def run_simulation(config_path:str):
+    os.system(f'cd ../AMLsim && python3 scripts/transaction_graph_generator.py "{config_path}"')
+    os.system(f'cd ../AMLsim && mvn exec:java -Dexec.mainClass=amlsim.AMLSim -Dexec.args="{config_path}"')
     return
 
 

diff --git a/auto-aml-data-gen/utils.py b/auto-aml-data-gen/utils.py
@@ -62,4 +62,35 @@ def read_bounds(folder:str):
     return conf_bounds
 
 
-read_bounds('param_files/test')
+def set_output_path(config_path):
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    sim_name = config['general']['simulation_name'] 
+    pwd = os.getcwd()
+    config['output']['directory'] = f'{pwd}/data/{sim_name}'
+    with open(config_path, 'w') as f:
+        json.dump(config, f, indent=2)
+    return
+
+
+def set_same_temp_params(config_path:str):
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+
+        config["default"]["mean_amount_sar"] = config["default"]["mean_amount"]
+        config["default"]["std_amount_sar"] = config["default"]["std_amount"]
+        config["default"]["mean_income_sar"] = config["default"]["mean_income"]
+        config["default"]["std_income_sar"] = config["default"]["std_income"]
+        config["default"]["mean_outcome_sar"] = config["default"]["mean_outcome"]
+        config["default"]["std_outcome_sar"] = config["default"]["std_outcome"]
+        config["default"]["mean_phone_change_frequency_sar"] = config["default"]["mean_phone_change_frequency"]
+        config["default"]["std_phone_change_frequency_sar"] = config["default"]["std_phone_change_frequency"]
+        config["default"]["mean_bank_change_frequency_sar"] = config["default"]["mean_bank_change_frequency"]
+        config["default"]["std_bank_change_frequency_sar"] = config["default"]["std_bank_change_frequency"]
+        config["default"]["prob_spend_cash"] = 0.0
+        config["default"]["prob_participate_in_multiple_sars"] = 0.0
+
+    with open(config_path, 'w') as f:
+        json.dump(config, f, indent=2)
+
+    return