-
Notifications
You must be signed in to change notification settings - Fork 3
/
tasks.py
1515 lines (1278 loc) · 55.4 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Deployment script used for the development of the Exareme2.
In order to understand this script a basic knowledge of the system is required, this script
does not contain the documentation of the engine. The documentation of the celery,
in this script, is targeted to the specifics of the development deployment process.
This script deploys all the containers and api natively on your machine.
It deploys the containers on different ports and then configures the api to use the appropriate ports.
A worker service uses a configuration file either on the default location './exareme2/worker/config.toml'
or in the location of the env variable 'EXAREME2_WORKER_CONFIG_FILE', if the env variable is set.
This deployment script used for development, uses the env variable logic, therefore before deploying each
worker service the env variable is changed to the location of the worker api' config file.
In order for this script's celery to work the './configs/workers' folder should contain all the worker's config files
following the './exareme2/worker/config.toml' as template.
You can either create the files manually or using a '.deployment.toml' file with the following template
```
ip = "172.17.0.1"
log_level = "INFO"
framework_log_level ="INFO"
monetdb_image = "madgik/exareme2_db:dev1.3"
[controller]
port = 5000
[[workers]]
id = "globalworker"
monetdb_port=50000
rabbitmq_port=5670
[[workers]]
id = "localworker1"
monetdb_port=50001
rabbitmq_port=5671
[[workers]]
id = "localworker2"
monetdb_port=50002
rabbitmq_port=5672
```
and by running the command 'inv create-configs'.
The worker api are named after their config file. If a config file is named './configs/workers/localworker1.toml'
the worker service will be called 'localworker1' and should be referenced using that in the following celery.
Paths are subject to change so in the following documentation the global variables will be used.
"""
import copy
import glob
import itertools
import json
import os
import pathlib
import shutil
import sys
import time
from enum import Enum
from itertools import cycle
from os import listdir
from os import path
from pathlib import Path
from textwrap import indent
from time import sleep
import requests
import toml
from invoke import UnexpectedExit
from invoke import task
from termcolor import colored
from exareme2.algorithms.exareme2.udfgen import udfio
PROJECT_ROOT = Path(__file__).parent
DEPLOYMENT_CONFIG_FILE = PROJECT_ROOT / ".deployment.toml"
WORKERS_CONFIG_DIR = PROJECT_ROOT / "configs" / "workers"
WORKER_CONFIG_TEMPLATE_FILE = PROJECT_ROOT / "exareme2" / "worker" / "config.toml"
CONTROLLER_CONFIG_DIR = PROJECT_ROOT / "configs" / "controller"
CONTROLLER_LOCALWORKERS_CONFIG_FILE = (
PROJECT_ROOT / "configs" / "controller" / "localworkers_config.json"
)
CONTROLLER_CONFIG_TEMPLATE_FILE = (
PROJECT_ROOT / "exareme2" / "controller" / "config.toml"
)
OUTDIR = Path("/tmp/exareme2/")
if not OUTDIR.exists():
OUTDIR.mkdir()
CLEANUP_DIR = Path("/tmp/cleanup_entries/")
if not CLEANUP_DIR.exists():
CLEANUP_DIR.mkdir()
TEST_DATA_FOLDER = PROJECT_ROOT / "tests" / "test_data"
EXAREME2_ALGORITHM_FOLDERS_ENV_VARIABLE = "EXAREME2_ALGORITHM_FOLDERS"
FLOWER_ALGORITHM_FOLDERS_ENV_VARIABLE = "FLOWER_ALGORITHM_FOLDERS"
EXAREME2_WORKER_CONFIG_FILE = "EXAREME2_WORKER_CONFIG_FILE"
SMPC_COORDINATOR_PORT = 12314
SMPC_COORDINATOR_DB_PORT = 27017
SMPC_COORDINATOR_QUEUE_PORT = 6379
SMPC_PLAYER_BASE_PORT = 7000
SMPC_CLIENT_BASE_PORT = 9000
SMPC_COORDINATOR_NAME = "smpc_coordinator"
SMPC_COORDINATOR_DB_NAME = "smpc_coordinator_db"
SMPC_COORDINATOR_QUEUE_NAME = "smpc_coordinator_queue"
SMPC_PLAYER_BASE_NAME = "smpc_player"
SMPC_CLIENT_BASE_NAME = "smpc_client"
# TODO Add pre-celery when this is implemented https://github.com/pyinvoke/invoke/issues/170
# Right now if we call a task from another task, the "pre"-task is not executed
@task
def create_configs(c):
"""
Create the worker and controller api config files, using 'DEPLOYMENT_CONFIG_FILE'.
"""
if path.exists(WORKERS_CONFIG_DIR) and path.isdir(WORKERS_CONFIG_DIR):
shutil.rmtree(WORKERS_CONFIG_DIR)
WORKERS_CONFIG_DIR.mkdir(parents=True)
if not Path(DEPLOYMENT_CONFIG_FILE).is_file():
raise FileNotFoundError(
f"Deployment config file '{DEPLOYMENT_CONFIG_FILE}' not found."
)
with open(DEPLOYMENT_CONFIG_FILE) as fp:
deployment_config = toml.load(fp)
with open(WORKER_CONFIG_TEMPLATE_FILE) as fp:
template_worker_config = toml.load(fp)
for worker in deployment_config["workers"]:
worker_config = copy.deepcopy(template_worker_config)
worker_config["identifier"] = worker["id"]
worker_config["role"] = worker["role"]
worker_config["federation"] = deployment_config["federation"]
worker_config["log_level"] = deployment_config["log_level"]
worker_config["framework_log_level"] = deployment_config["framework_log_level"]
worker_config["controller"]["ip"] = deployment_config["ip"]
worker_config["controller"]["port"] = deployment_config["controller"]["port"]
worker_config["sqlite"]["db_name"] = worker["id"]
worker_config["monetdb"]["ip"] = deployment_config["ip"]
worker_config["monetdb"]["port"] = worker["monetdb_port"]
worker_config["monetdb"]["local_username"] = worker["local_monetdb_username"]
worker_config["monetdb"]["local_password"] = worker["local_monetdb_password"]
worker_config["monetdb"]["public_username"] = worker["public_monetdb_username"]
worker_config["monetdb"]["public_password"] = worker["public_monetdb_password"]
worker_config["monetdb"]["public_password"] = worker["public_monetdb_password"]
worker_config["rabbitmq"]["ip"] = deployment_config["ip"]
worker_config["rabbitmq"]["port"] = worker["rabbitmq_port"]
worker_config["celery"]["tasks_timeout"] = deployment_config[
"celery_tasks_timeout"
]
worker_config["celery"]["run_udf_task_timeout"] = deployment_config[
"celery_run_udf_task_timeout"
]
worker_config["privacy"]["minimum_row_count"] = deployment_config["privacy"][
"minimum_row_count"
]
if worker["role"] == "GLOBALWORKER":
worker_config["privacy"]["protect_local_data"] = False
else:
worker_config["privacy"]["protect_local_data"] = deployment_config[
"privacy"
]["protect_local_data"]
worker_config["smpc"]["enabled"] = deployment_config["smpc"]["enabled"]
if worker_config["smpc"]["enabled"]:
worker_config["smpc"]["optional"] = deployment_config["smpc"]["optional"]
if coordinator_ip := deployment_config["smpc"].get("coordinator_ip"):
if worker["role"] == "GLOBALWORKER":
worker_config["smpc"][
"coordinator_address"
] = f"http://{coordinator_ip}:{SMPC_COORDINATOR_PORT}"
else:
worker_config["smpc"]["client_id"] = worker["smpc_client_id"]
worker_config["smpc"][
"client_address"
] = f"http://{coordinator_ip}:{worker['smpc_client_port']}"
else:
if worker["role"] == "GLOBALWORKER":
worker_config["smpc"][
"coordinator_address"
] = f"http://{deployment_config['ip']}:{SMPC_COORDINATOR_PORT}"
else:
worker_config["smpc"]["client_id"] = worker["id"]
worker_config["smpc"][
"client_address"
] = f"http://{deployment_config['ip']}:{worker['smpc_client_port']}"
worker_config_file = WORKERS_CONFIG_DIR / f"{worker['id']}.toml"
with open(worker_config_file, "w+") as fp:
toml.dump(worker_config, fp)
# Create the controller config file
with open(CONTROLLER_CONFIG_TEMPLATE_FILE) as fp:
template_controller_config = toml.load(fp)
controller_config = copy.deepcopy(template_controller_config)
controller_config["node_identifier"] = "controller"
controller_config["federation"] = deployment_config["federation"]
controller_config["log_level"] = deployment_config["log_level"]
controller_config["framework_log_level"] = deployment_config["framework_log_level"]
controller_config[
"worker_landscape_aggregator_update_interval"
] = deployment_config["worker_landscape_aggregator_update_interval"]
controller_config["flower_execution_timeout"] = deployment_config[
"flower_execution_timeout"
]
controller_config["rabbitmq"]["celery_tasks_timeout"] = deployment_config[
"celery_tasks_timeout"
]
controller_config["rabbitmq"]["celery_cleanup_task_timeout"] = deployment_config[
"celery_cleanup_task_timeout"
]
controller_config["rabbitmq"]["celery_run_udf_task_timeout"] = deployment_config[
"celery_run_udf_task_timeout"
]
controller_config["deployment_type"] = "LOCAL"
controller_config["localworkers"]["config_file"] = str(
CONTROLLER_LOCALWORKERS_CONFIG_FILE
)
controller_config["localworkers"]["dns"] = ""
controller_config["localworkers"]["port"] = ""
controller_config["cleanup"]["contextids_cleanup_folder"] = str(CLEANUP_DIR)
controller_config["cleanup"]["workers_cleanup_interval"] = deployment_config[
"cleanup"
]["workers_cleanup_interval"]
controller_config["cleanup"]["contextid_release_timelimit"] = deployment_config[
"cleanup"
]["contextid_release_timelimit"]
controller_config["smpc"]["enabled"] = deployment_config["smpc"]["enabled"]
if controller_config["smpc"]["enabled"]:
controller_config["smpc"]["optional"] = deployment_config["smpc"]["optional"]
if coordinator_ip := deployment_config["smpc"].get("coordinator_ip"):
controller_config["smpc"][
"coordinator_address"
] = f"http://{coordinator_ip}:{SMPC_COORDINATOR_PORT}"
else:
controller_config["smpc"][
"coordinator_address"
] = f"http://{deployment_config['ip']}:{SMPC_COORDINATOR_PORT}"
controller_config["smpc"]["get_result_interval"] = deployment_config["smpc"][
"get_result_interval"
]
controller_config["smpc"]["get_result_max_retries"] = deployment_config["smpc"][
"get_result_max_retries"
]
controller_config["smpc"]["dp"]["enabled"] = deployment_config["smpc"]["dp"][
"enabled"
]
if controller_config["smpc"]["dp"]["enabled"]:
controller_config["smpc"]["dp"]["sensitivity"] = deployment_config["smpc"][
"dp"
]["sensitivity"]
controller_config["smpc"]["dp"]["privacy_budget"] = deployment_config[
"smpc"
]["dp"]["privacy_budget"]
CONTROLLER_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
controller_config_file = CONTROLLER_CONFIG_DIR / "controller.toml"
with open(controller_config_file, "w+") as fp:
toml.dump(controller_config, fp)
# Create the controller localworkers config file
localworkers_addresses = [
f"{deployment_config['ip']}:{worker['rabbitmq_port']}"
for worker in deployment_config["workers"]
]
with open(CONTROLLER_LOCALWORKERS_CONFIG_FILE, "w+") as fp:
json.dump(localworkers_addresses, fp)
@task
def install_dependencies(c):
"""Install project dependencies using poetry."""
message("Installing dependencies...", Level.HEADER)
cmd = "poetry install"
run(c, cmd)
@task
def rm_containers(c, container_name=None, monetdb=False, rabbitmq=False, smpc=False):
"""
Remove the specified docker containers, either by container or relative name.
:param container_name: If set, removes the container with the specified name.
:param monetdb: If True, it will remove all monetdb containers.
:param rabbitmq: If True, it will remove all rabbitmq containers.
:param smpc: If True, it will remove all smpc related containers.
If nothing is set, nothing is removed.
"""
names = []
if monetdb:
names.append("monetdb")
if rabbitmq:
names.append("rabbitmq")
if smpc:
names.append("smpc")
if container_name:
names.append(container_name)
if not names:
message(
"You must specify at least one container family to remove (monetdb or/and rabbitmq)",
level=Level.WARNING,
)
for name in names:
container_ids = run(c, f"docker ps -qa --filter name={name}", show_ok=False)
if container_ids.stdout:
message(f"Removing {name} container(s)...", Level.HEADER)
cmd = f"docker rm -vf $(docker ps -qa --filter name={name})"
run(c, cmd)
else:
message(f"No {name} container to remove.", level=Level.HEADER)
@task(iterable=["worker"])
def create_monetdb(
c, worker, image=None, log_level=None, nclients=None, monetdb_memory_limit=None
):
"""
(Re)Create MonetDB container(s) for given worker(s). If the container exists, it will remove it and create it again.
:param worker: A list of workers for which it will create the monetdb containers.
:param image: The image to deploy. If not set, it will read it from the `DEPLOYMENT_CONFIG_FILE`.
:param log_level: If not set, it will read it from the `DEPLOYMENT_CONFIG_FILE`.
:param nclients: If not set, it will read it from the `DEPLOYMENT_CONFIG_FILE`.
If an image is not provided it will use the 'monetdb_image' field from
the 'DEPLOYMENT_CONFIG_FILE' ex. monetdb_image = "madgik/exareme2_db:dev1.2"
The data of the monetdb container are not persisted. If the container is recreated, all data will be lost.
"""
if not worker:
message("Please specify a worker using --worker <worker>", Level.WARNING)
sys.exit(1)
if not image:
image = get_deployment_config("monetdb_image")
if not log_level:
log_level = get_deployment_config("log_level")
if not nclients:
nclients = get_deployment_config("monetdb_nclients")
if not monetdb_memory_limit:
monetdb_memory_limit = get_deployment_config("monetdb_memory_limit")
get_docker_image(c, image)
udfio_full_path = path.abspath(udfio.__file__)
worker_ids = worker
for worker_id in worker_ids:
container_name = f"monetdb-{worker_id}"
rm_containers(c, container_name=container_name)
worker_config_file = WORKERS_CONFIG_DIR / f"{worker_id}.toml"
with open(worker_config_file) as fp:
worker_config = toml.load(fp)
monetdb_nclient_env_var = ""
if worker_config["role"] == "GLOBALWORKER":
monetdb_nclient_env_var = f"-e MONETDB_NCLIENTS={nclients}"
container_ports = f"{worker_config['monetdb']['port']}:50000"
message(
f"Starting container {container_name} on ports {container_ports}...",
Level.HEADER,
)
cmd = f"""docker run -d -P -p {container_ports} -e SOFT_RESTART_MEMORY_LIMIT={monetdb_memory_limit * 0.7} -e HARD_RESTART_MEMORY_LIMIT={monetdb_memory_limit * 0.85} -e LOG_LEVEL={log_level} {monetdb_nclient_env_var} -e MAX_MEMORY={monetdb_memory_limit*1048576} {monetdb_nclient_env_var} -v {udfio_full_path}:/home/udflib/udfio.py -v {TEST_DATA_FOLDER}:{TEST_DATA_FOLDER} --name {container_name} --memory={monetdb_memory_limit}m {image}"""
run(c, cmd)
@task(iterable=["worker"])
def init_system_tables(c, worker):
"""
Initialize Sqlite with the system tables using mipdb.
:param worker: A list of workers that will be initialized.
"""
workers = worker
for worker in workers:
sqlite_path = f"{TEST_DATA_FOLDER}/{worker}.db"
clean_sqlite(sqlite_path)
message(
f"Initializing system tables on sqlite with mipdb on worker: {worker}...",
Level.HEADER,
)
cmd = f"""poetry run mipdb init {get_sqlite_path(worker)}"""
run(c, cmd)
@task
def update_wla(c):
url = "http://localhost:5000/wla"
response = requests.post(url)
if response.status_code != 200:
raise Exception("Failed to update the wla")
print("Successfully updated wla.")
@task(iterable=["port"])
def load_data(c, use_sockets=False, worker=None):
"""
Load data into the specified DB from the 'TEST_DATA_FOLDER'.
:param port: A list of ports, in which it will load the data. If not set, it will use the `WORKERS_CONFIG_DIR` files.
:param use_sockets: Flag that determines if the data will be loaded via sockets or not.
"""
def get_worker_configs():
"""
Retrieve the configuration files of all workers.
:return: A list of worker configurations.
"""
config_files = [
WORKERS_CONFIG_DIR / file for file in listdir(WORKERS_CONFIG_DIR)
]
if not config_files:
message(
f"There are no worker config files to be used for data import. Folder: {WORKERS_CONFIG_DIR}",
Level.WARNING,
)
sys.exit(1)
worker_configs = []
for worker_config_file in config_files:
with open(worker_config_file) as fp:
worker_config = toml.load(fp)
worker_configs.append(worker_config)
return worker_configs
def filter_worker_configs(worker_configs, worker, node_type):
"""
Filter worker configurations based on a specific worker identifier and node type.
:param worker_configs: A list of all worker configurations.
:param worker: The identifier of the worker to filter for.
:param node_type: The type of node to filter for (default is "localworker").
:return: A list of tuples containing worker identifiers and ports.
"""
return [
(config["identifier"], config["monetdb"]["port"])
for config in worker_configs
if (not worker or config["identifier"] == worker)
and config["role"] == node_type
]
def load_data_model_metadata(c, cdes_file, worker_id_and_ports):
"""
Load the data model metadata into MonetDB for each worker.
:param c: The context object.
:param cdes_file: Path to the CDEsMetadata.json file.
:param worker_id_and_ports: A list of tuples containing worker identifiers and ports.
:return: The data model code and version.
"""
with open(cdes_file) as data_model_metadata_file:
data_model_metadata = json.load(data_model_metadata_file)
data_model_code = data_model_metadata["code"]
data_model_version = data_model_metadata["version"]
def run_with_retries(c, cmd, retries=5, wait_seconds=1):
"""Attempts to run a command, retrying in case of failure."""
attempt = 0
while attempt < retries:
try:
run(c, cmd) # Try to run the command
return # Exit if successful
except Exception as e:
attempt += 1
if attempt < retries:
message(
f"Attempt {attempt} failed. Retrying in {wait_seconds} seconds...",
Level.WARNING,
)
time.sleep(wait_seconds) # Wait before retrying
else:
message(
f"All {retries} attempts failed. Error: {str(e)}",
Level.ERROR,
)
raise e # Re-raise the last exception after all retries
# Main loop for loading data models with retries
for worker_id, port in worker_id_and_ports:
message(
f"Loading data model '{data_model_code}:{data_model_version}' metadata in MonetDB at port {port}...",
Level.HEADER,
)
cmd = f"poetry run mipdb add-data-model {cdes_file} {get_monetdb_configs_in_mipdb_format(port)} {get_sqlite_path(worker_id)}"
# Try running the command with retries
run_with_retries(c, cmd)
return data_model_code, data_model_version
def load_datasets(
c,
dirpath,
filenames,
data_model_code,
data_model_version,
worker_id_and_ports,
use_sockets,
):
"""
Load datasets into MonetDB for each worker in a round-robin fashion.
:param c: The context object.
:param dirpath: Directory path of the current dataset.
:param filenames: List of filenames in the current directory.
:param data_model_code: The data model code.
:param data_model_version: The data model version.
:param worker_id_and_ports: A list of tuples containing worker identifiers and ports.
:param use_sockets: Flag to determine if data will be loaded via sockets.
"""
if len(worker_id_and_ports) == 1:
worker_id, port = worker_id_and_ports[0]
for file in filenames:
if file.endswith(".csv") and not file.endswith("test.csv"):
csv = os.path.join(dirpath, file)
message(
f"Loading dataset {pathlib.PurePath(csv).name} in MonetDB at port {port}...",
Level.HEADER,
)
cmd = f"poetry run mipdb add-dataset {csv} -d {data_model_code} -v {data_model_version} --copy_from_file {not use_sockets} {get_monetdb_configs_in_mipdb_format(port)} {get_sqlite_path(worker_id)}"
run(c, cmd)
return
# Load the first set of CSVs into the first worker
first_worker_csvs = sorted(
[
f"{dirpath}/{file}"
for file in filenames
if file.endswith("0.csv") and not file.endswith("10.csv")
]
)
for csv in first_worker_csvs:
worker_id, port = worker_id_and_ports[0]
message(
f"Loading dataset {pathlib.PurePath(csv).name} in MonetDB at port {port}...",
Level.HEADER,
)
cmd = f"poetry run mipdb add-dataset {csv} -d {data_model_code} -v {data_model_version} --copy_from_file {not use_sockets} {get_monetdb_configs_in_mipdb_format(port)} {get_sqlite_path(worker_id)}"
run(c, cmd)
# Load the remaining CSVs into the remaining workers in a round-robin fashion
remaining_csvs = sorted(
[
f"{dirpath}/{file}"
for file in filenames
if file.endswith(".csv")
and not file.endswith("0.csv")
and not file.endswith("test.csv")
]
)
worker_id_and_ports_cycle = itertools.cycle(worker_id_and_ports[1:])
for csv in remaining_csvs:
worker_id, port = next(worker_id_and_ports_cycle)
message(
f"Loading dataset {pathlib.PurePath(csv).name} in MonetDB at port {port}...",
Level.HEADER,
)
cmd = f"poetry run mipdb add-dataset {csv} -d {data_model_code} -v {data_model_version} --copy_from_file {not use_sockets} {get_monetdb_configs_in_mipdb_format(port)} {get_sqlite_path(worker_id)}"
run(c, cmd)
def load_test_datasets(
c,
dirpath,
filenames,
data_model_code,
data_model_version,
worker_id_and_ports,
use_sockets,
):
"""
Load datasets ending with 'test' into the global worker.
:param c: The context object.
:param dirpath: Directory path of the current dataset.
:param filenames: List of filenames in the current directory.
:param data_model_code: The data model code.
:param data_model_version: The data model version.
:param worker_id_and_ports: A list of tuples containing worker identifiers and ports.
:param use_sockets: Flag to determine if data will be loaded via sockets.
"""
test_csvs = sorted(
[f"{dirpath}/{file}" for file in filenames if file.endswith("test.csv")]
)
for csv in test_csvs:
worker_id, port = worker_id_and_ports[0]
message(
f"Loading test dataset {pathlib.PurePath(csv).name} in MonetDB at port {port}...",
Level.HEADER,
)
cmd = f"poetry run mipdb add-dataset {csv} -d {data_model_code} -v {data_model_version} --copy_from_file {not use_sockets} {get_monetdb_configs_in_mipdb_format(port)} {get_sqlite_path(worker_id)}"
run(c, cmd)
# Retrieve and filter worker configurations for local workers
worker_configs = get_worker_configs()
local_worker_id_and_ports = filter_worker_configs(
worker_configs, worker, "LOCALWORKER"
)
if not local_worker_id_and_ports:
raise Exception("Local worker config files cannot be loaded.")
# Process each dataset in the TEST_DATA_FOLDER for local workers
for dirpath, dirnames, filenames in os.walk(TEST_DATA_FOLDER):
if "CDEsMetadata.json" not in filenames:
continue
cdes_file = os.path.join(dirpath, "CDEsMetadata.json")
# Load data model metadata
data_model_code, data_model_version = load_data_model_metadata(
c, cdes_file, local_worker_id_and_ports
)
# Load datasets
load_datasets(
c,
dirpath,
filenames,
data_model_code,
data_model_version,
local_worker_id_and_ports,
use_sockets,
)
# Retrieve and filter worker configurations for global worker
global_worker_id_and_ports = filter_worker_configs(
worker_configs, worker, "GLOBALWORKER"
)
if not global_worker_id_and_ports:
raise Exception("Global worker config files cannot be loaded.")
# Process each dataset in the TEST_DATA_FOLDER for global worker
for dirpath, dirnames, filenames in os.walk(TEST_DATA_FOLDER):
if "CDEsMetadata.json" not in filenames:
continue
cdes_file = os.path.join(dirpath, "CDEsMetadata.json")
# Load data model metadata
data_model_code, data_model_version = load_data_model_metadata(
c, cdes_file, global_worker_id_and_ports
)
load_test_datasets(
c,
dirpath,
filenames,
data_model_code,
data_model_version,
global_worker_id_and_ports,
use_sockets,
)
def get_sqlite_path(worker_id):
return f"--sqlite_db_path {TEST_DATA_FOLDER}/{worker_id}.db"
def get_monetdb_configs_in_mipdb_format(port):
return (
f"--ip 127.0.0.1 "
f"--port {port} "
f"--username admin "
f"--password executor "
f"--db_name db"
)
@task(iterable=["worker"])
def create_rabbitmq(c, worker, rabbitmq_image=None):
"""
(Re)Create RabbitMQ container(s) of given worker(s). If the container exists, remove it and create it again.
:param worker: A list of workers for which to (re)create the rabbitmq containers.
:param rabbitmq_image: The image to deploy. If not set, it will read it from the `DEPLOYMENT_CONFIG_FILE`.
"""
if not worker:
message("Please specify a worker using --worker <worker>", Level.WARNING)
sys.exit(1)
if not rabbitmq_image:
rabbitmq_image = get_deployment_config("rabbitmq_image")
get_docker_image(c, rabbitmq_image)
worker_ids = worker
for worker_id in worker_ids:
container_name = f"rabbitmq-{worker_id}"
rm_containers(c, container_name=container_name)
worker_config_file = WORKERS_CONFIG_DIR / f"{worker_id}.toml"
with open(worker_config_file) as fp:
worker_config = toml.load(fp)
queue_port = f"{worker_config['rabbitmq']['port']}:5672"
api_port = f"{worker_config['rabbitmq']['port']+10000}:15672"
message(
f"Starting container {container_name} on ports {queue_port}...",
Level.HEADER,
)
cmd = f"docker run -d -p {queue_port} -p {api_port} --name {container_name} {rabbitmq_image}"
run(c, cmd)
for worker_id in worker_ids:
container_name = f"rabbitmq-{worker_id}"
cmd = f"docker inspect --format='{{{{json .State.Health}}}}' {container_name}"
# Wait until rabbitmq is healthy
message(
f"Waiting for container {container_name} to become healthy...",
Level.HEADER,
)
for _ in range(100):
status = run(c, cmd, raise_error=True, wait=True, show_ok=False)
if '"Status":"healthy"' not in status.stdout:
spin_wheel(time=2)
else:
message("Ok", Level.SUCCESS)
break
else:
message("Cannot configure RabbitMQ", Level.ERROR)
sys.exit(1)
@task
def kill_worker(c, worker=None, all_=False):
"""
Kill the worker(s) service(s).
:param worker: The worker service to kill.
:param all_: If set, all worker api will be killed.
"""
if all_:
worker_pattern = ""
elif worker:
worker_pattern = worker
else:
message(
"Please specify a worker using --worker <worker> or use --all",
Level.WARNING,
)
sys.exit(1)
res_bin = run(
c,
f"ps aux | grep '[c]elery' | grep 'worker' | grep '{worker_pattern}' ",
warn=True,
show_ok=False,
)
if res_bin.ok:
message(
f"Killing previous celery instance(s) with pattern '{worker_pattern}' ...",
Level.HEADER,
)
# We need to kill the celery worker processes with the "worker_pattern", if provided.
# First we kill the parent process (celery workers' parent) if there is one, when "worker_pattern is provided,
# and then we kill all the celery worker processes with/without a pattern.
cmd = (
f"pid=$(ps aux | grep '[c]elery' | grep 'worker' | grep '{worker_pattern}' | awk '{{print $2}}') "
f"&& pgrep -P $pid | xargs kill -9 "
)
run(c, cmd, warn=True, show_ok=False)
cmd = (
f"pid=$(ps aux | grep '[c]elery' | grep 'worker' | grep '{worker_pattern}' | awk '{{print $2}}') "
f"&& kill -9 $pid "
)
run(c, cmd, warn=True)
else:
message("No celery instances found", Level.HEADER)
def validate_algorithm_folders(folders, name):
"""Validates and retrieves the algorithm folder configuration."""
if not folders:
folders = get_deployment_config(name)
if not isinstance(folders, str):
raise ValueError(f"The {name} configuration must be a comma-separated string.")
return folders
@task
def start_worker(
c,
worker=None,
all_=False,
framework_log_level=None,
detached=False,
exareme2_algorithm_folders=None,
flower_algorithm_folders=None,
):
"""
(Re)Start the worker(s) service(s). If a worker service is running, stop and start it again.
:param worker: The worker to start, using the proper file in the `WORKERS_CONFIG_DIR`.
:param all_: If set, the workers of which the configuration file exists, will be started.
:param framework_log_level: If not provided, it will look into the `DEPLOYMENT_CONFIG_FILE`.
:param detached: If set to True, it will start the service in the background.
:param exareme2_algorithm_folders: Used from the api. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
:param flower_algorithm_folders: Used from the api. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
The containers related to the api remain unchanged.
"""
if not framework_log_level:
framework_log_level = get_deployment_config("framework_log_level")
# Validate algorithm folders
exareme2_algorithm_folders = validate_algorithm_folders(
exareme2_algorithm_folders, "exareme2_algorithm_folders"
)
flower_algorithm_folders = validate_algorithm_folders(
flower_algorithm_folders, "flower_algorithm_folders"
)
worker_ids = get_worker_ids(all_, worker)
worker_ids.sort() # Sorting the ids protects removing a similarly named id
for worker_id in worker_ids:
kill_worker(c, worker_id)
message(f"Starting Worker {worker_id}...", Level.HEADER)
worker_config_file = WORKERS_CONFIG_DIR / f"{worker_id}.toml"
with c.prefix(
f"export {EXAREME2_ALGORITHM_FOLDERS_ENV_VARIABLE}={exareme2_algorithm_folders}"
):
with c.prefix(
f"export {FLOWER_ALGORITHM_FOLDERS_ENV_VARIABLE}={flower_algorithm_folders}"
):
with c.prefix(
f"export {EXAREME2_WORKER_CONFIG_FILE}={worker_config_file}"
):
outpath = OUTDIR / (worker_id + ".out")
if detached or all_:
cmd = (
f"PYTHONPATH={PROJECT_ROOT}: poetry run celery "
f"-A exareme2.worker.utils.celery_app worker -l {framework_log_level} > {outpath} "
f"--pool=eventlet --purge 2>&1"
)
run(c, cmd, wait=False)
else:
cmd = (
f"PYTHONPATH={PROJECT_ROOT} poetry run celery -A "
f"exareme2.worker.utils.celery_app worker -l {framework_log_level} --pool=eventlet --purge"
)
run(c, cmd, attach_=True)
@task
def kill_controller(c):
"""Kill the controller service."""
HYPERCORN_PROCESS_NAME = "[f]rom multiprocessing.spawn import spawn_main;"
res = run(c, f"ps aux | grep '{HYPERCORN_PROCESS_NAME}'", warn=True, show_ok=False)
if res.ok:
message("Killing previous Hypercorn instances...", Level.HEADER)
cmd = f"ps aux | grep '{HYPERCORN_PROCESS_NAME}' | awk '{{ print $2}}' | xargs kill -9 && sleep 5"
run(c, cmd)
else:
message("No hypercorn instance found", Level.HEADER)
@task
def start_controller(
c, detached=False, exareme2_algorithm_folders=None, flower_algorithm_folders=None
):
"""
(Re)Start the controller service. If the service is already running, stop and start it again.
"""
# Validate algorithm folders
exareme2_algorithm_folders = validate_algorithm_folders(
exareme2_algorithm_folders, "exareme2_algorithm_folders"
)
flower_algorithm_folders = validate_algorithm_folders(
flower_algorithm_folders, "flower_algorithm_folders"
)
kill_controller(c)
message("Starting Controller...", Level.HEADER)
controller_config_file = CONTROLLER_CONFIG_DIR / "controller.toml"
with c.prefix(
f"export {EXAREME2_ALGORITHM_FOLDERS_ENV_VARIABLE}={exareme2_algorithm_folders}"
):
with c.prefix(
f"export {FLOWER_ALGORITHM_FOLDERS_ENV_VARIABLE}={flower_algorithm_folders}"
):
with c.prefix(
f"export EXAREME2_CONTROLLER_CONFIG_FILE={controller_config_file}"
):
outpath = OUTDIR / "controller.out"
if detached:
cmd = f"PYTHONPATH={PROJECT_ROOT} poetry run hypercorn --config python:exareme2.controller.quart.hypercorn_config -b 0.0.0.0:5000 exareme2/controller/quart/app:app>> {outpath} 2>&1"
run(c, cmd, wait=False)
else:
cmd = f"PYTHONPATH={PROJECT_ROOT} poetry run hypercorn --config python:exareme2.controller.quart.hypercorn_config -b 0.0.0.0:5000 exareme2/controller/quart/app:app"
run(c, cmd, attach_=True)
@task
def deploy(
c,
install_dep=True,
start_all=True,
start_controller_=False,
start_workers=False,
log_level=None,
framework_log_level=None,
monetdb_image=None,
monetdb_nclients=None,
exareme2_algorithm_folders=None,
flower_algorithm_folders=None,
smpc=None,
):
"""
Install dependencies, (re)create all the containers and (re)start all the api.
:param install_dep: Install dependencies or not.
:param start_all: Start all worker/controller api flag.
:param start_controller_: Start controller api flag.
:param start_workers: Start all workers flag.
:param log_level: Used for the dev logs. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
:param framework_log_level: Used for the engine api. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
:param monetdb_image: Used for the db containers. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
:param monetdb_nclients: Used for the db containers. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
:param exareme2_algorithm_folders: Used from the api. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
:param flower_algorithm_folders: Used from the api. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
:param smpc: Deploy the SMPC cluster as well. If not provided, it looks in the `DEPLOYMENT_CONFIG_FILE`.
"""
if not log_level:
log_level = get_deployment_config("log_level")
if not framework_log_level:
framework_log_level = get_deployment_config("framework_log_level")
if not monetdb_image:
monetdb_image = get_deployment_config("monetdb_image")
if not monetdb_nclients:
monetdb_nclients = get_deployment_config("monetdb_nclients")
if not exareme2_algorithm_folders:
exareme2_algorithm_folders = get_deployment_config("exareme2_algorithm_folders")
if not flower_algorithm_folders:
flower_algorithm_folders = get_deployment_config("flower_algorithm_folders")
if smpc is None:
smpc = get_deployment_config("smpc", subconfig="enabled")
if install_dep:
install_dependencies(c)
# Start WORKER api
config_files = [WORKERS_CONFIG_DIR / file for file in listdir(WORKERS_CONFIG_DIR)]
if not config_files:
message(
f"There are no worker config files to be used for deployment. Folder: {WORKERS_CONFIG_DIR}",
Level.WARNING,
)
sys.exit(1)
worker_ids = []
for worker_config_file in config_files:
with open(worker_config_file) as fp:
worker_config = toml.load(fp)
worker_ids.append(worker_config["identifier"])
worker_ids.sort() # Sorting the ids protects removing a similarly named id, localworker1 would remove localworker10.
create_monetdb(
c,
worker=worker_ids,
image=monetdb_image,
log_level=log_level,