Skip to content

Commit

Permalink
fix: gentropy fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
javfg committed Nov 26, 2024
1 parent 77c7d24 commit 79d4402
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 45 deletions.
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ x-airflow-common: &airflow-common
AIRFLOW__CORE__FERNET_KEY: ""
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "true"
AIRFLOW__CORE__LOAD_EXAMPLES: "false"
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 32
AIRFLOW__API__AUTH_BACKENDS: "airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session"
AIRFLOW__WEBSERVER__SECRET_KEY: "secretkey"
# yamllint disable rule:line-length
Expand Down
40 changes: 13 additions & 27 deletions src/ot_orchestration/dags/config/gentropy.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
---
python_main_module: gs://genetics_etl_python_playground/initialisation/gentropy/dev/cli.py
python_main_module: gs://genetics_etl_python_playground/initialisation/gentropy/v2.0.0rc1/cli.py

dataproc_cluster_settings:
cluster_metadata:
PACKAGE: gs://genetics_etl_python_playground/initialisation/gentropy/dev/gentropy-0.0.0-py3-none-any.whl
cluster_init_script: gs://genetics_etl_python_playground/initialisation/gentropy/dev/install_dependencies_on_cluster.sh
autoscaling_policy: otg-efm
allow_efm: true
num_workers: 10
PACKAGE: gs://genetics_etl_python_playground/initialisation/gentropy/v2.0.0rc1/gentropy-0.0.0-py3-none-any.whl
cluster_init_script: gs://genetics_etl_python_playground/initialisation/gentropy/v2.0.0rc1/install_dependencies_on_cluster.sh
autoscaling_policy: otg-etl
allow_efm: false
num_workers: 2

batch_settings: &batch_settings
resource_specs:
Expand Down Expand Up @@ -74,7 +74,7 @@ steps:
- gs://gwas_catalog_sumstats_susie/credible_set_clean/
- gs://eqtl_catalogue_data/credible_set_datasets/eqtl_catalogue_susie/
- gs://ukb_ppp_eur_data/credible_set_clean/
- gs://finngen_data/r11/credible_set_datasets/susie/
- gs://eqtl_catalogue_data/credible_set_datasets/eqtl_catalogue_susie_patched/
step.valid_study_locus_path: '{gcs_url}/output/genetics/parquet/credible_set'
step.invalid_study_locus_path: '{gcs_url}/output/genetics/parquet/invalid_credible_set'
step.invalid_qc_reasons:
Expand Down Expand Up @@ -121,6 +121,7 @@ steps:
step.coloc_path: '{gcs_url}/output/genetics/parquet/colocalisation' # the path has to be the same as ecaviar
step.colocalisation_method: Coloc
+step.colocalisation_method_params: '{priorc1: 1e-4, priorc2: 1e-4, priorc12: 1e-5}'
+step.session.extended_spark_conf: "{spark.sql.shuffle.partitions: '4000'}"
step.session.write_mode: ignore

colocalisation_ecaviar:
Expand All @@ -129,12 +130,13 @@ steps:
step.credible_set_path: '{gcs_url}/output/genetics/parquet/credible_set'
step.coloc_path: '{gcs_url}/output/genetics/parquet/colocalisation'
step.colocalisation_method: ECaviar
+step.session.extended_spark_conf: "{spark.sql.shuffle.partitions: '4000', spark.executor.memory: '8g'}"
step.session.write_mode: ignore

variant_annotation:
google-batch:
<<: *batch_settings
image: europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/custom_ensembl_vep:dev
image: europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/custom_ensembl_vep:v2.0.0rc1
entrypoint: /bin/sh
params:
vep_cache_path: gs://genetics_etl_python_playground/vep/cache
Expand Down Expand Up @@ -162,32 +164,16 @@ steps:
step.session.write_mode: overwrite
# step.session.write_mode: ignore

l2g_train:
params:
step: locus_to_gene
step.run_mode: train
step.wandb_run_name: 24.10_freeze6
step.hf_hub_repo_id: opentargets/locus_to_gene
step.model_path: '{gcs_url}/output/genetics/models/locus_to_gene_model/classifier.skops'
step.credible_set_path: '{gcs_url}/output/genetics/parquet/credible_set'
step.variant_index_path: '{gcs_url}/output/genetics/parquet/variant_index'
step.feature_matrix_path: '{gcs_url}/output/genetics/parquet/l2g_feature_matrix'
step.gold_standard_curation_path: gs://genetics_etl_python_playground/input/l2g/gold_standard/curation.json
step.gene_interactions_path: '{gcs_url}/output/etl/parquet/interaction'
step.hyperparameters.n_estimators: 100
step.hyperparameters.max_depth: 5
step.hyperparameters.loss: log_loss
+step.session.extended_spark_conf: "{spark.kryoserializer.buffer.max:500m, spark.sql.autoBroadcastJoinThreshold:'-1'}"
step.session.write_mode: ignore

l2g_predict:
params:
step: locus_to_gene
step.run_mode: predict
step.predictions_path: '{gcs_url}/output/genetics/parquet/l2g_predictions'
step.feature_matrix_path: '{gcs_url}/output/genetics/parquet/l2g_feature_matrix'
step.credible_set_path: '{gcs_url}/output/genetics/parquet/credible_set'
step.download_from_hub: true
step.download_from_hub: false
step.l2g_threshold: 0.05
step.model_path: gs://ot_orchestration/benchmarks/l2g/fm0/v5.1_best_cv/locus_to_gene_model/classifier.skops
step.hf_hub_repo_id: opentargets/locus_to_gene
step.session.write_mode: overwrite
# step.session.write_mode: ignore
Expand Down
31 changes: 13 additions & 18 deletions src/ot_orchestration/dags/config/unified_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ steps:
etl_go:
depends_on:
- pis_go
etl_disease:
depends_on:
- ontoform_disease
etl_expression:
depends_on:
- ontoform_expression
Expand All @@ -96,7 +93,7 @@ steps:
etl_otar:
ppp_only: true
depends_on:
- etl_disease
- ontoform_disease
depends_on_ppp:
- pis_otar
etl_target:
Expand All @@ -109,11 +106,11 @@ steps:
depends_on:
- pis_drug
- etl_target
- etl_disease
- ontoform_disease
etl_facetsearch:
depends_on:
- etl_target
- etl_disease
- ontoform_disease
- etl_go
etl_interaction:
depends_on:
Expand Down Expand Up @@ -179,7 +176,7 @@ steps:
gentropy_study_validation:
depends_on:
- ontoform_target
- etl_disease
- ontoform_disease
- gentropy_biosample_index
- gentropy_gene_index

Expand All @@ -192,12 +189,6 @@ steps:
- pis_evidence
- pis_pharmacogenomics
- gentropy_credible_set_validation
gentropy_colocalisation_coloc:
depends_on:
- gentropy_credible_set_validation
gentropy_colocalisation_ecaviar:
depends_on:
- gentropy_credible_set_validation

gentropy_variant_annotation:
depends_on:
Expand All @@ -207,21 +198,25 @@ steps:
depends_on:
- gentropy_variant_annotation

gentropy_colocalisation_coloc:
depends_on:
- gentropy_variant_index

gentropy_colocalisation_ecaviar:
depends_on:
- gentropy_colocalisation_coloc

gentropy_l2g_feature_matrix:
depends_on:
- gentropy_colocalisation_coloc
- gentropy_colocalisation_ecaviar
- gentropy_variant_index

gentropy_l2g_train:
gentropy_l2g_predict:
depends_on:
- etl_interaction
- gentropy_l2g_feature_matrix

gentropy_l2g_predict:
depends_on:
- gentropy_l2g_train

gentropy_l2g_evidence:
depends_on:
- gentropy_l2g_predict

0 comments on commit 79d4402

Please sign in to comment.