Skip to content

Commit

Permalink
Refactored Algorithm specifications. (#480)
Browse files Browse the repository at this point in the history
 - The specs are loaded by the json files.
 - Added type to the specifications of the algorithms (necessary before the integration of the flower on the repo)
  • Loading branch information
KFilippopolitis authored Apr 15, 2024
1 parent 148ddec commit d350381
Show file tree
Hide file tree
Showing 74 changed files with 695 additions and 491 deletions.
15 changes: 0 additions & 15 deletions exareme2/algorithms/exareme2/algorithm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from abc import ABC
from abc import abstractmethod
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import Dict
Expand All @@ -9,8 +8,6 @@

from pydantic import BaseModel

from exareme2.algorithms.specifications import AlgorithmSpecification

if TYPE_CHECKING:
from exareme2.controller.services.exareme2 import AlgorithmExecutionEngine
from exareme2.controller.services.exareme2 import LocalWorkersTable
Expand Down Expand Up @@ -162,18 +159,6 @@ def algorithm_parameters(self) -> Dict[str, Any]:
def datasets(self) -> List[str]:
return self._initialization_params.datasets

@classmethod
def get_specification(cls) -> AlgorithmSpecification:
"""Returns the algorithm specs object
Algorithm specs are read from a json file placed in the same folder as
the algorithm implementation file, i.e. the file where `Algorithm` is
subclassed. The json file contents must map to the
`AlgorithmSpecification` structure.
"""
file = Path(__file__).parent / f"{cls.algname}.json"
return AlgorithmSpecification.parse_file(file)

@abstractmethod
def run(self, data: "LocalWorkersTable", metadata: dict):
"""
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/anova.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Test the difference in the means of the dependent variable between two or more groups, when there are two independent covariates.",
"label": "Two-way ANOVA",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/anova_oneway.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Test the difference in the means of the dependent variable between two or more groups, when there is a single independent covariate.",
"label": "One-way ANOVA",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/descriptive_stats.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Descriptive statistics",
"label": "Descriptive statistics",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "y",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/kmeans.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "K-Means",
"label": "K-Means",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "y",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/linear_regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Statistical method that models the relationship between a dependent variable and one or more independent variables by fitting a linear model to the observed data by ordinary least squares (OLS).",
"label": "Linear Regression",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/linear_regression_cv.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Method used to evaluate the performance of a linear regression model. It involves splitting the data into training and validation sets and testing the model's ability to generalize to new data by using the validation set.",
"label": "Linear Regression Cross-validation",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/logistic_regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Statistical method. that models the relationship between a dependent binary variable and one or more independent variables by fitting a binary logistic curve to the observed data.",
"label": "Logistic Regression",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/logistic_regression_cv.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Method used to evaluate the performance of a logistic regression model. It involves splitting the data into training and validation sets and testing the model's ability to generalize to new data by using the validation set.",
"label": "Logistic Regression Cross-validation",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
67 changes: 67 additions & 0 deletions exareme2/algorithms/exareme2/logistic_regression_fedaverage.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"name": "logistic_regression_cv_fedaverage",
"desc": "Method used to evaluate the performance of a logistic regression model. It involves splitting the data into training and validation sets and testing the model's ability to generalize to new data by using the validation set.",
"label": "Logistic Regression Cross-validation",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
"desc": "A unique nominal variable. The variable is converted to binary by assigning 1 to the positive class and 0 to all other classes. ",
"types": [
"int",
"text"
],
"stattypes": [
"nominal"
],
"notblank": true,
"multiple": false
},
"x": {
"label": "Covariates (independent)",
"desc": "One or more variables. Can be numerical or nominal. For nominal variables dummy encoding is used.",
"types": [
"real",
"int",
"text"
],
"stattypes": [
"numerical",
"nominal"
],
"notblank": true,
"multiple": true
}
},
"parameters": {
"positive_class": {
"label": "Positive class",
"desc": "Positive class of y. All other classes are considered negative.",
"types": [
"text",
"int"
],
"notblank": true,
"multiple": false,
"enums": {
"type": "input_var_CDE_enums",
"source": [
"y"
]
}
},
"n_splits": {
"label": "Number of splits",
"desc": "Number of splits for cross-validation.",
"types": [
"int"
],
"notblank": true,
"multiple": false,
"default": 5,
"min": 2,
"max": 20
}
}
}
18 changes: 0 additions & 18 deletions exareme2/algorithms/exareme2/logistic_regression_fedaverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
from exareme2.algorithms.exareme2.logistic_regression_cv import (
CVLogisticRegressionResult,
)
from exareme2.algorithms.exareme2.logistic_regression_cv import (
LogisticRegressionCVAlgorithm,
)
from exareme2.algorithms.exareme2.logistic_regression_cv import ROCCurve
from exareme2.algorithms.exareme2.logistic_regression_cv import (
make_classification_metrics_summary,
Expand All @@ -28,7 +25,6 @@
from exareme2.algorithms.exareme2.udfgen import secure_transfer
from exareme2.algorithms.exareme2.udfgen import udf
from exareme2.algorithms.specifications import AlgorithmName
from exareme2.algorithms.specifications import AlgorithmSpecification

ALGORITHM_NAME = AlgorithmName.LOGISTIC_REGRESSION_CV_FEDAVERAGE

Expand All @@ -39,20 +35,6 @@ def get_variable_groups(self):


class LogRegCVFedAverageAlgorithm(Algorithm, algname=ALGORITHM_NAME):
@classmethod
def get_specification(cls):
# Use the LR with CV specification but change the name
LR_with_cv_specification = LogisticRegressionCVAlgorithm.get_specification()
LR_with_cv_fedavg = AlgorithmSpecification(
name=ALGORITHM_NAME,
desc=LR_with_cv_specification.desc,
label=LR_with_cv_specification.label,
enabled=LR_with_cv_specification.enabled,
inputdata=LR_with_cv_specification.inputdata,
parameters=LR_with_cv_specification.parameters,
)
return LR_with_cv_fedavg

def run(self, data, metadata):
X, y = data

Expand Down
53 changes: 53 additions & 0 deletions exareme2/algorithms/exareme2/longitudinal_transformer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"name":"longitudinal_transformer",
"desc":"longitudinal_transform",
"label":"Longitudinal Transformer",
"enabled":true,
"type": "exareme2_transformer",
"parameters":{
"visit1": {
"label":"1st Visit",
"desc":"The data of a certain subject's measurements during a specific visit on a specific date.",
"types":["text"],
"notblank":true,
"multiple":false,
"enums":{
"type":"fixed_var_CDE_enums", "source":["visitid"]

}
},
"visit2": {
"label":"2nd Visit",
"desc":"The data of the same subject's measurements during a specific visit on a specific but different, later date.",
"types":["text"],
"notblank":true,
"multiple":false,
"enums": {
"type": "fixed_var_CDE_enums", "source":["visitid"]
}
},
"strategies": {
"label":"Strategies",
"desc":"Select a strategy for each variable.",
"types":["dict"],
"notblank":true,
"multiple":false,
"dict_keys_enums":{
"type":"input_var_names", "source":["x", "y"]
},
"dict_values_enums":{
"type": "list", "source":["diff", "first", "second"]
}
}
},
"compatible_algorithms":[
"anova",
"anova_oneway",
"linear_regression",
"linear_regression_cv",
"logistic_regression",
"logistic_regression_cv",
"naive_bayes_gaussian_cv",
"naive_bayes_categorical_cv"
]
}
60 changes: 0 additions & 60 deletions exareme2/algorithms/exareme2/longitudinal_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,7 @@
from exareme2 import DType
from exareme2.algorithms.exareme2.udfgen import AdhocUdfGenerator
from exareme2.algorithms.exareme2.udfgen.udfgen_DTOs import UDFGenTableResult
from exareme2.algorithms.specifications import AlgorithmName
from exareme2.algorithms.specifications import ParameterEnumSpecification
from exareme2.algorithms.specifications import ParameterEnumType
from exareme2.algorithms.specifications import ParameterSpecification
from exareme2.algorithms.specifications import ParameterType
from exareme2.algorithms.specifications import TransformerName
from exareme2.algorithms.specifications import TransformerSpecification
from exareme2.worker_communication import BadUserInput

if TYPE_CHECKING:
Expand Down Expand Up @@ -73,60 +67,6 @@ def __init__(
def get_transformer_name(cls):
return TRANSFORMER_NAME

@classmethod
def get_specification(cls):
return TransformerSpecification(
name=cls.get_transformer_name(),
desc="longitudinal_transform",
label="Longitudinal Transformer",
enabled=True,
parameters={
"visit1": ParameterSpecification(
label="1st Visit",
desc="The data of a certain subject's measurements during a specific visit on a specific date.",
types=[ParameterType.TEXT],
notblank=True,
multiple=False,
enums=ParameterEnumSpecification(
type=ParameterEnumType.FIXED_VAR_CDE_ENUMS, source=["visitid"]
),
),
"visit2": ParameterSpecification(
label="2nd Visit",
desc="The data of the same subject's measurements during a specific visit on a specific but different, later date.",
types=[ParameterType.TEXT],
notblank=True,
multiple=False,
enums=ParameterEnumSpecification(
type=ParameterEnumType.FIXED_VAR_CDE_ENUMS, source=["visitid"]
),
),
"strategies": ParameterSpecification(
label="Strategies",
desc="Select a strategy for each variable.",
types=[ParameterType.DICT],
notblank=True,
multiple=False,
dict_keys_enums=ParameterEnumSpecification(
type=ParameterEnumType.INPUT_VAR_NAMES, source=["x", "y"]
),
dict_values_enums=ParameterEnumSpecification(
type=ParameterEnumType.LIST, source=["diff", "first", "second"]
),
),
},
compatible_algorithms=[
AlgorithmName.ANOVA,
AlgorithmName.ANOVA_ONEWAY,
AlgorithmName.LINEAR_REGRESSION,
AlgorithmName.LINEAR_REGRESSION_CV,
AlgorithmName.LOGISTIC_REGRESSION,
AlgorithmName.LOGISTIC_REGRESSION_CV,
AlgorithmName.NAIVE_BAYES_GAUSSIAN_CV,
AlgorithmName.NAIVE_BAYES_CATEGORICAL_CV,
],
)

def run(self, data, metadata):
X, y = data
metadata: dict = metadata
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/multiple_histograms.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Multiple Histograms",
"label": "Multiple Histograms",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "y",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Uses Bayes' theorem to calculate the probability of each class given a set of nominal features assuming independence between features. It then classifies data points based on the class with the highest probability.",
"label": "Categorical Naive Bayes classifier with cross-validation",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/naive_bayes_gaussian_cv.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Uses Bayes' theorem to calculate the probability of each class given a set of numerical features assuming independence between features. It then classifies data points ba sed on the class with the highest probability.",
"label": "Gaussian Naive Bayes classifier with cross-validation",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable (dependent)",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/pca.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Computes the principal components of a set of correlated variables. The principal components can then be used to represent the original data with reduced dimensions.",
"label": "Principal Component Analysis (PCA)",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variables",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/pearson_correlation.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Measure the linear relationship between two continuous variables. It calculates the correlation coefficient (range: -1 to 1). The correlation matrix will be computed between all possible pairs of variables and covariates. Leaving covariates empty is equivalent to having covariates = variables.",
"label": "Pearson Correlation Matrix",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variables",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/svm_scikit.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Divide datasets into classes to find a maximum marginal hyperplane.",
"label": "SVM",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Classes",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/ttest_independent.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Test the difference in means between two independent groups. It assumes that the two groups have equal variances and are independently sampled from normal distributions.",
"label": "T-Test Independent",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable of interest",
Expand Down
1 change: 1 addition & 0 deletions exareme2/algorithms/exareme2/ttest_onesample.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"desc": "Test the difference in mean of a single sample with a population mean. It assumes that the sample is drawn from a normal distribution.",
"label": "T-Test One-Sample",
"enabled": true,
"type": "exareme2",
"inputdata": {
"y": {
"label": "Variable",
Expand Down
Loading

0 comments on commit d350381

Please sign in to comment.