diff --git a/workflow/modules/1-define.smk b/workflow/modules/1-define.smk index b80fa78e..d2640668 100644 --- a/workflow/modules/1-define.smk +++ b/workflow/modules/1-define.smk @@ -1,9 +1,10 @@ import pandas as pd from pathlib import Path import os +import sys from snakemake.utils import validate -# load and validate config +# Load and validate config configfile: "inputs/config/config.yaml" config_schema = "../schema/config.schema.yaml" validate(config, config_schema) @@ -13,13 +14,12 @@ common_config = config["common"] pipeline_config = config["pipeline"] deseq_config = config["DESeq2"] -# Set up input directories +# Set up input directories and files main_dir = common_config["projectdir"] if main_dir is None: main_dir = os.getcwd() main_dir = Path(main_dir) - genome_dir = Path(pipeline_config["genomedir"]) num_threads = pipeline_config["threads"] @@ -40,6 +40,33 @@ sample_id_col = pipeline_config["sample_id"] SAMPLES = pd.read_table(metadata_file)[sample_id_col].tolist() print("samples: " + str(SAMPLES)) + +# Check existence of reference files, break if not there +genome_filename = pipeline_config["genome_filename"] +annotation_filename = pipeline_config["annotation_filename"] + +genome_filepath = genome_dir / genome_filename +annotation_filepath = genome_dir / annotation_filename + +check_ref_fasta = os.path.exists(genome_filepath) + +if check_ref_fasta == False: + sys.exit(f"Error! You are missing the expected reference genome file: {genome_filepath}") + +check_ref_genome = os.path.exists(annotation_filepath) + +if check_ref_genome == False: + sys.exit(f"Error! You are missing the expected reference annotation file: {annotation_filepath}") + +if common_config["platform"] =="TempO-Seq": + biospyder_filepath = common_config["biospyder_dbs"] + common_config["biospyder_manifest_file"] + + check_manifest = os.path.exists(biospyder_filepath) + + if check_manifest == False: + sys.exit(f"Error! You are missing the expected biospyder manifest file: {biospyder_filepath}") + + # Set up output directories output_dir = main_dir / "output"