Skip to content

Commit

Permalink
feat: finngen finemapping ingestion dag
Browse files Browse the repository at this point in the history
  • Loading branch information
Szymon Szyszkowski committed Aug 20, 2024
1 parent 925ff79 commit 23dc7bd
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 176 deletions.
2 changes: 0 additions & 2 deletions config/datasets/ot_gcp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ thurman: ${datasets.static_assets}/thurman2012/genomewideCorrs_above0.7_promoter
target_index: ${datasets.static_assets}/targets # OTP 23.12 data
gene_interactions: ${datasets.static_assets}/interaction # OTP 23.12 data

finngen_finemapping_results_path: ${datasets.inputs}/Finngen_susie_finemapping_r10/full
finngen_finemapping_summaries_path: ${datasets.inputs}/Finngen_susie_finemapping_r10/Finngen_susie_credset_summary_r10.tsv

# Dev output datasets
variant_annotation: ${datasets.outputs}/variant_annotation
Expand Down
7 changes: 0 additions & 7 deletions config/step/ot_finngen_finemapping_ingestion.yaml

This file was deleted.

71 changes: 71 additions & 0 deletions src/ot_orchestration/dags/finngen_finemapping_ingestion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Airflow DAG for the Preprocess part of the pipeline."""

from __future__ import annotations

from pathlib import Path
from ot_orchestration.utils import common
from airflow.models.dag import DAG
from airflow.utils.trigger_rule import TriggerRule
from ot_orchestration.utils.dataproc import (
delete_cluster,
submit_step,
create_cluster,
install_dependencies,
)
from airflow.models.baseoperator import chain

EFO_MAPPINGS_PATH = "https://raw.githubusercontent.com/opentargets/curation/24.09.1/mappings/disease/manual_string.tsv"
STUDY_INDEX_OUT = "gs://finngen_data/r11/study_index"
CREDIBLE_SETS_SUMMARY_IN = (
"gs://finngen-public-data-r11/finemap/summary/*.cred.summary.tsv"
)
SNP_IN = "gs://finngen-public-data-r11/finemap/full/susie/*.snp.bgz"
FINNGEN_PREFIX = "FINNGEN_R11_"
FINEMAPPING_OUT = "gs://finngen_data/r11/finemapping"

CLUSTER_NAME = "otg-finemapping-ingestion-finngen"
AUTOSCALING = "finngen-preprocess"

with DAG(
dag_id=Path(__file__).stem,
description="Open Targets Genetics — Finngen Susie Finemapping Results Ingestion",
default_args=common.shared_dag_args,
**common.shared_dag_kwargs,
):
finngen_finemapping_ingestion = submit_step(
cluster_name=CLUSTER_NAME,
step_id="finngen_finemapping_ingestion",
task_id="finngen_finemapping_ingestion",
other_args=[
f"step.finngen_finemapping_out={FINEMAPPING_OUT}",
f"step.finngen_release_prefix={FINNGEN_PREFIX}",
f"step.finngen_susie_finemapping_snp_files={SNP_IN}",
f"step.finngen_susie_finemapping_cs_summary_files={CREDIBLE_SETS_SUMMARY_IN}",
"step.session.start_hail=true",
"step.session.write_mode=overwrite",
],
trigger_rule=TriggerRule.ALL_DONE,
)

finngen_study_index = submit_step(
cluster_name=CLUSTER_NAME,
step_id="finngen_studies",
task_id="finngen_studies",
other_args=[
f"step.finngen_study_index_out={STUDY_INDEX_OUT}",
"step.session.write_mode=overwrite",
],
trigger_rule=TriggerRule.ALL_DONE,
)
chain(
create_cluster(
CLUSTER_NAME,
autoscaling_policy=AUTOSCALING,
master_disk_size=2000,
num_workers=6,
),
install_dependencies(CLUSTER_NAME),
finngen_study_index,
finngen_finemapping_ingestion,
delete_cluster(CLUSTER_NAME),
)
85 changes: 0 additions & 85 deletions src/ot_orchestration/dags/finngen_harmonisation.py

This file was deleted.

82 changes: 0 additions & 82 deletions src/ot_orchestration/dags/finngen_preprocess.py

This file was deleted.

0 comments on commit 23dc7bd

Please sign in to comment.