Refactored Algorithm specifications. (#480)

- The specs are loaded by the json files. - Added type to the specifications of the algorithms (necessary before the integration of the flower on the repo)
madgik · Apr 15, 2024 · d350381 · d350381
1 parent 148ddec
commit d350381
Show file tree

Hide file tree

Showing 74 changed files with 695 additions and 491 deletions.
diff --git a/exareme2/algorithms/exareme2/algorithm.py b/exareme2/algorithms/exareme2/algorithm.py
@@ -1,6 +1,5 @@
 from abc import ABC
 from abc import abstractmethod
-from pathlib import Path
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Dict
@@ -9,8 +8,6 @@
 
 from pydantic import BaseModel
 
-from exareme2.algorithms.specifications import AlgorithmSpecification
-
 if TYPE_CHECKING:
     from exareme2.controller.services.exareme2 import AlgorithmExecutionEngine
     from exareme2.controller.services.exareme2 import LocalWorkersTable
@@ -162,18 +159,6 @@ def algorithm_parameters(self) -> Dict[str, Any]:
     def datasets(self) -> List[str]:
         return self._initialization_params.datasets
 
-    @classmethod
-    def get_specification(cls) -> AlgorithmSpecification:
-        """Returns the algorithm specs object
-
-        Algorithm specs are read from a json file placed in the same folder as
-        the algorithm implementation file, i.e. the file where `Algorithm` is
-        subclassed. The json file contents must map to the
-        `AlgorithmSpecification` structure.
-        """
-        file = Path(__file__).parent / f"{cls.algname}.json"
-        return AlgorithmSpecification.parse_file(file)
-
     @abstractmethod
     def run(self, data: "LocalWorkersTable", metadata: dict):
         """

diff --git a/exareme2/algorithms/exareme2/anova.json b/exareme2/algorithms/exareme2/anova.json
@@ -3,6 +3,7 @@
     "desc": "Test the difference in the means of the dependent variable between two or more groups, when there are two independent covariates.",
     "label": "Two-way ANOVA",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/anova_oneway.json b/exareme2/algorithms/exareme2/anova_oneway.json
@@ -3,6 +3,7 @@
     "desc": "Test the difference in the means of the dependent variable between two or more groups, when there is a single independent covariate.",
     "label": "One-way ANOVA",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/descriptive_stats.json b/exareme2/algorithms/exareme2/descriptive_stats.json
@@ -3,6 +3,7 @@
     "desc": "Descriptive statistics",
     "label": "Descriptive statistics",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "y",

diff --git a/exareme2/algorithms/exareme2/kmeans.json b/exareme2/algorithms/exareme2/kmeans.json
@@ -3,6 +3,7 @@
     "desc": "K-Means",
     "label": "K-Means",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "y",

diff --git a/exareme2/algorithms/exareme2/linear_regression.json b/exareme2/algorithms/exareme2/linear_regression.json
@@ -3,6 +3,7 @@
     "desc": "Statistical method that models the relationship between a dependent variable and one or more independent variables by fitting a linear model to the observed data by ordinary least squares (OLS).",
     "label": "Linear Regression",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/linear_regression_cv.json b/exareme2/algorithms/exareme2/linear_regression_cv.json
@@ -3,6 +3,7 @@
     "desc": "Method used to evaluate the performance of a linear regression model. It involves splitting the data into training and validation sets and testing the model's ability to generalize to new data by using the validation set.",
     "label": "Linear Regression Cross-validation",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/logistic_regression.json b/exareme2/algorithms/exareme2/logistic_regression.json
@@ -3,6 +3,7 @@
     "desc": "Statistical method. that models the relationship between a dependent binary variable and one or more independent variables by fitting a binary logistic curve to the observed data.",
     "label": "Logistic Regression",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/logistic_regression_cv.json b/exareme2/algorithms/exareme2/logistic_regression_cv.json
@@ -3,6 +3,7 @@
     "desc": "Method used to evaluate the performance of a logistic regression model. It involves splitting the data into training and validation sets and testing the model's ability to generalize to new data by using the validation set.",
     "label": "Logistic Regression Cross-validation",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/logistic_regression_fedaverage.json b/exareme2/algorithms/exareme2/logistic_regression_fedaverage.json
@@ -0,0 +1,67 @@
+{
+    "name": "logistic_regression_cv_fedaverage",
+    "desc": "Method used to evaluate the performance of a logistic regression model. It involves splitting the data into training and validation sets and testing the model's ability to generalize to new data by using the validation set.",
+    "label": "Logistic Regression Cross-validation",
+    "enabled": true,
+    "type": "exareme2",
+    "inputdata": {
+        "y": {
+            "label": "Variable (dependent)",
+            "desc": "A unique nominal variable. The variable is converted to binary by assigning 1 to the positive class and 0 to all other classes. ",
+            "types": [
+                "int",
+                "text"
+            ],
+            "stattypes": [
+                "nominal"
+            ],
+            "notblank": true,
+            "multiple": false
+        },
+        "x": {
+            "label": "Covariates (independent)",
+            "desc": "One or more variables. Can be numerical or nominal. For nominal variables dummy encoding is used.",
+            "types": [
+                "real",
+                "int",
+                "text"
+            ],
+            "stattypes": [
+                "numerical",
+                "nominal"
+            ],
+            "notblank": true,
+            "multiple": true
+        }
+    },
+    "parameters": {
+        "positive_class": {
+            "label": "Positive class",
+            "desc": "Positive class of y. All other classes are considered negative.",
+            "types": [
+                "text",
+                "int"
+            ],
+            "notblank": true,
+            "multiple": false,
+            "enums": {
+                "type": "input_var_CDE_enums",
+                "source": [
+                    "y"
+                ]
+            }
+        },
+        "n_splits": {
+            "label": "Number of splits",
+            "desc": "Number of splits for cross-validation.",
+            "types": [
+                "int"
+            ],
+            "notblank": true,
+            "multiple": false,
+            "default": 5,
+            "min": 2,
+            "max": 20
+        }
+    }
+}
diff --git a/exareme2/algorithms/exareme2/logistic_regression_fedaverage.py b/exareme2/algorithms/exareme2/logistic_regression_fedaverage.py
@@ -12,9 +12,6 @@
 from exareme2.algorithms.exareme2.logistic_regression_cv import (
     CVLogisticRegressionResult,
 )
-from exareme2.algorithms.exareme2.logistic_regression_cv import (
-    LogisticRegressionCVAlgorithm,
-)
 from exareme2.algorithms.exareme2.logistic_regression_cv import ROCCurve
 from exareme2.algorithms.exareme2.logistic_regression_cv import (
     make_classification_metrics_summary,
@@ -28,7 +25,6 @@
 from exareme2.algorithms.exareme2.udfgen import secure_transfer
 from exareme2.algorithms.exareme2.udfgen import udf
 from exareme2.algorithms.specifications import AlgorithmName
-from exareme2.algorithms.specifications import AlgorithmSpecification
 
 ALGORITHM_NAME = AlgorithmName.LOGISTIC_REGRESSION_CV_FEDAVERAGE
 
@@ -39,20 +35,6 @@ def get_variable_groups(self):
 
 
 class LogRegCVFedAverageAlgorithm(Algorithm, algname=ALGORITHM_NAME):
-    @classmethod
-    def get_specification(cls):
-        # Use the LR with CV specification but change the name
-        LR_with_cv_specification = LogisticRegressionCVAlgorithm.get_specification()
-        LR_with_cv_fedavg = AlgorithmSpecification(
-            name=ALGORITHM_NAME,
-            desc=LR_with_cv_specification.desc,
-            label=LR_with_cv_specification.label,
-            enabled=LR_with_cv_specification.enabled,
-            inputdata=LR_with_cv_specification.inputdata,
-            parameters=LR_with_cv_specification.parameters,
-        )
-        return LR_with_cv_fedavg
-
     def run(self, data, metadata):
         X, y = data
 

diff --git a/exareme2/algorithms/exareme2/longitudinal_transformer.json b/exareme2/algorithms/exareme2/longitudinal_transformer.json
@@ -0,0 +1,53 @@
+{
+    "name":"longitudinal_transformer",
+    "desc":"longitudinal_transform",
+    "label":"Longitudinal Transformer",
+    "enabled":true,
+    "type": "exareme2_transformer",
+    "parameters":{
+        "visit1": {
+            "label":"1st Visit",
+            "desc":"The data of a certain subject's measurements during a specific visit on a specific date.",
+            "types":["text"],
+            "notblank":true,
+            "multiple":false,
+            "enums":{
+                "type":"fixed_var_CDE_enums", "source":["visitid"]
+
+            }
+        },
+        "visit2": {
+            "label":"2nd Visit",
+            "desc":"The data of the same subject's measurements during a specific visit on a specific but different, later date.",
+            "types":["text"],
+            "notblank":true,
+            "multiple":false,
+            "enums": {
+                "type": "fixed_var_CDE_enums", "source":["visitid"]
+            }
+        },
+        "strategies": {
+            "label":"Strategies",
+            "desc":"Select a strategy for each variable.",
+            "types":["dict"],
+            "notblank":true,
+            "multiple":false,
+            "dict_keys_enums":{
+                "type":"input_var_names", "source":["x", "y"]
+            },
+            "dict_values_enums":{
+                "type": "list", "source":["diff", "first", "second"]
+            }
+        }
+    },
+    "compatible_algorithms":[
+        "anova",
+        "anova_oneway",
+        "linear_regression",
+        "linear_regression_cv",
+        "logistic_regression",
+        "logistic_regression_cv",
+        "naive_bayes_gaussian_cv",
+        "naive_bayes_categorical_cv"
+    ]
+}
diff --git a/exareme2/algorithms/exareme2/longitudinal_transformer.py b/exareme2/algorithms/exareme2/longitudinal_transformer.py
@@ -9,13 +9,7 @@
 from exareme2 import DType
 from exareme2.algorithms.exareme2.udfgen import AdhocUdfGenerator
 from exareme2.algorithms.exareme2.udfgen.udfgen_DTOs import UDFGenTableResult
-from exareme2.algorithms.specifications import AlgorithmName
-from exareme2.algorithms.specifications import ParameterEnumSpecification
-from exareme2.algorithms.specifications import ParameterEnumType
-from exareme2.algorithms.specifications import ParameterSpecification
-from exareme2.algorithms.specifications import ParameterType
 from exareme2.algorithms.specifications import TransformerName
-from exareme2.algorithms.specifications import TransformerSpecification
 from exareme2.worker_communication import BadUserInput
 
 if TYPE_CHECKING:
@@ -73,60 +67,6 @@ def __init__(
     def get_transformer_name(cls):
         return TRANSFORMER_NAME
 
-    @classmethod
-    def get_specification(cls):
-        return TransformerSpecification(
-            name=cls.get_transformer_name(),
-            desc="longitudinal_transform",
-            label="Longitudinal Transformer",
-            enabled=True,
-            parameters={
-                "visit1": ParameterSpecification(
-                    label="1st Visit",
-                    desc="The data of a certain subject's measurements during a specific visit on a specific date.",
-                    types=[ParameterType.TEXT],
-                    notblank=True,
-                    multiple=False,
-                    enums=ParameterEnumSpecification(
-                        type=ParameterEnumType.FIXED_VAR_CDE_ENUMS, source=["visitid"]
-                    ),
-                ),
-                "visit2": ParameterSpecification(
-                    label="2nd Visit",
-                    desc="The data of the same subject's measurements during a specific visit on a specific but different, later date.",
-                    types=[ParameterType.TEXT],
-                    notblank=True,
-                    multiple=False,
-                    enums=ParameterEnumSpecification(
-                        type=ParameterEnumType.FIXED_VAR_CDE_ENUMS, source=["visitid"]
-                    ),
-                ),
-                "strategies": ParameterSpecification(
-                    label="Strategies",
-                    desc="Select a strategy for each variable.",
-                    types=[ParameterType.DICT],
-                    notblank=True,
-                    multiple=False,
-                    dict_keys_enums=ParameterEnumSpecification(
-                        type=ParameterEnumType.INPUT_VAR_NAMES, source=["x", "y"]
-                    ),
-                    dict_values_enums=ParameterEnumSpecification(
-                        type=ParameterEnumType.LIST, source=["diff", "first", "second"]
-                    ),
-                ),
-            },
-            compatible_algorithms=[
-                AlgorithmName.ANOVA,
-                AlgorithmName.ANOVA_ONEWAY,
-                AlgorithmName.LINEAR_REGRESSION,
-                AlgorithmName.LINEAR_REGRESSION_CV,
-                AlgorithmName.LOGISTIC_REGRESSION,
-                AlgorithmName.LOGISTIC_REGRESSION_CV,
-                AlgorithmName.NAIVE_BAYES_GAUSSIAN_CV,
-                AlgorithmName.NAIVE_BAYES_CATEGORICAL_CV,
-            ],
-        )
-
     def run(self, data, metadata):
         X, y = data
         metadata: dict = metadata

diff --git a/exareme2/algorithms/exareme2/multiple_histograms.json b/exareme2/algorithms/exareme2/multiple_histograms.json
@@ -3,6 +3,7 @@
     "desc": "Multiple Histograms",
     "label": "Multiple Histograms",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "y",

diff --git a/exareme2/algorithms/exareme2/naive_bayes_categorical_cv.json b/exareme2/algorithms/exareme2/naive_bayes_categorical_cv.json
@@ -3,6 +3,7 @@
     "desc": "Uses Bayes' theorem to calculate the probability of each class given a set of nominal features assuming independence between features. It then classifies data points based on the class with the highest probability.",
     "label": "Categorical Naive Bayes classifier with cross-validation",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/naive_bayes_gaussian_cv.json b/exareme2/algorithms/exareme2/naive_bayes_gaussian_cv.json
@@ -3,6 +3,7 @@
     "desc": "Uses Bayes' theorem to calculate the probability of each class given a set of numerical features assuming independence between features. It then classifies data points ba sed on the class with the highest probability.",
     "label": "Gaussian Naive Bayes classifier with cross-validation",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable (dependent)",

diff --git a/exareme2/algorithms/exareme2/pca.json b/exareme2/algorithms/exareme2/pca.json
@@ -3,6 +3,7 @@
     "desc": "Computes the principal components of a set of correlated variables. The principal components can then be used to represent the original data with reduced dimensions.",
     "label": "Principal Component Analysis (PCA)",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variables",

diff --git a/exareme2/algorithms/exareme2/pearson_correlation.json b/exareme2/algorithms/exareme2/pearson_correlation.json
@@ -3,6 +3,7 @@
     "desc": "Measure the linear relationship between two continuous variables. It calculates the correlation coefficient (range: -1 to 1). The correlation matrix will be computed between all possible pairs of variables and covariates. Leaving covariates empty is equivalent to having covariates = variables.",
     "label": "Pearson Correlation Matrix",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variables",

diff --git a/exareme2/algorithms/exareme2/svm_scikit.json b/exareme2/algorithms/exareme2/svm_scikit.json
@@ -3,6 +3,7 @@
     "desc": "Divide datasets into classes to find a maximum marginal hyperplane.",
     "label": "SVM",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Classes",

diff --git a/exareme2/algorithms/exareme2/ttest_independent.json b/exareme2/algorithms/exareme2/ttest_independent.json
@@ -3,6 +3,7 @@
     "desc": "Test the difference in means between two independent groups. It assumes that the two groups have equal variances and are independently sampled from normal distributions.",
     "label": "T-Test Independent",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable of interest",

diff --git a/exareme2/algorithms/exareme2/ttest_onesample.json b/exareme2/algorithms/exareme2/ttest_onesample.json
@@ -3,6 +3,7 @@
     "desc": "Test the difference in mean of a single sample with a population mean. It assumes that the sample is drawn from a normal distribution.",
     "label": "T-Test One-Sample",
     "enabled": true,
+    "type": "exareme2",
     "inputdata": {
         "y": {
             "label": "Variable",