From f5a4b38643cfee97176ab7bc440d4779a0dd5246 Mon Sep 17 00:00:00 2001 From: riasc Date: Mon, 24 Jun 2024 23:51:22 -0500 Subject: [PATCH] enhanced UI --- CHANGELOG.md | 5 +- workflow/envs/spladder.yml | 2 +- workflow/rules/altsplicing.smk | 2 +- workflow/rules/common.smk | 146 ++++++++++------- workflow/rules/exitron.smk | 2 +- workflow/rules/hlatyping.smk | 152 +++++++++--------- workflow/rules/indel.smk | 4 +- .../scripts/genotyping/combine_all_alleles.py | 5 + .../combine_optitype_results.py | 5 +- .../genotyping/merge_predicted_mhcI.py | 32 ---- .../scripts/genotyping/optitype_wrapper.py | 42 +++++ 11 files changed, 228 insertions(+), 169 deletions(-) rename workflow/scripts/{ => genotyping}/combine_optitype_results.py (88%) delete mode 100644 workflow/scripts/genotyping/merge_predicted_mhcI.py create mode 100644 workflow/scripts/genotyping/optitype_wrapper.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 565465e..f41306b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fix - Separated samtools, bcftools and realign environments to avoid conflicts -- Updated splAdder to v3.0.5 +- Changed order of genotyping rules to catch errors when no alleles can be found + - Alleles are merged according to nartype (e.g., DNA, RNA) and then combined +- Force concat of VCF files in genotyping to avoid errors when no variants are found +- Added optitype wrapper to avoid errors when empty BAM files are provided / no HLA reads ## [0.2.6] - 2024-06-20 diff --git a/workflow/envs/spladder.yml b/workflow/envs/spladder.yml index 3eea7de..8796f05 100644 --- a/workflow/envs/spladder.yml +++ b/workflow/envs/spladder.yml @@ -5,4 +5,4 @@ dependencies: - python=3.8 - pip - pip: - - spladder==3.0.5 + - spladder==3.0.4 diff --git a/workflow/rules/altsplicing.smk b/workflow/rules/altsplicing.smk index 4105058..eb13eeb 100644 --- a/workflow/rules/altsplicing.smk +++ b/workflow/rules/altsplicing.smk @@ -73,5 +73,5 @@ rule combine_altsplicing: "../envs/bcftools.yml" shell: """ - bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 + bcftools concat --naive-force -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 """ diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index b6d4402..869e024 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -196,84 +196,118 @@ def get_input_hlatyping_PE(wildcards): ) +def get_filtered_reads_hlatyping_SE(wildcards): + bam = [] + idx = [] + + if wildcards.nartype == "DNA": + if len(config["data"]["dnaseq"]) != 0: + for key in config["data"]["dnaseq"].keys(): + bam += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_SE.bam", + sample=wildcards.sample, + group=key) + idx += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_SE.bam.bai", + sample=wildcards.sample, + group=key) + + + if wildcards.nartype == "RNA": + if len(config["data"]["rnaseq"]) != 0: + for key in config["data"]["rnaseq"].keys(): + bam += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_SE.bam", + sample=wildcards.sample, + group=key) + idx += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_SE.bam.bai", + sample=wildcards.sample, + group=key) + + return dict( + zip( + ["bam", "idx"], + [bam, idx] + ) + ) + + +def get_filtered_reads_hlatyping_PE(wildcards): + bam = [] + idx = [] + + if wildcards.nartype == "DNA": + if len(config["data"]["dnaseq"]) != 0: + for key in config["data"]["dnaseq"].keys(): + bam += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_PE_{readpair}.bam", + sample=wildcards.sample, + group=key, + readpair=wildcards.readpair) + idx += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_PE_{readpair}.bam.bai", + sample=wildcards.sample, + group=key, + readpair=wildcards.readpair) + + if wildcards.nartype == "RNA": + if len(config["data"]["rnaseq"]) != 0: + for key in config["data"]["rnaseq"].keys(): + bam += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_PE_{readpair}.bam", + sample=wildcards.sample, + group=key, + readpair=wildcards.readpair) + + idx += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_PE_{readpair}.bam.bai", + sample=wildcards.sample, + group=key, + readpair=wildcards.readpair) + + return dict( + zip( + ["bam", "idx"], + [bam, idx] + ) + ) + + def aggregate_mhcI_SE(wildcards): checkpoint_output = checkpoints.split_reads_mhcI_SE.get(**wildcards).output[0] - return expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_SE/{no}_result.tsv", + return expand("results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_SE/{no}_result.tsv", sample=wildcards.sample, - group=wildcards.group, nartype=wildcards.nartype, no=glob_wildcards(os.path.join(checkpoint_output, "R_{no}.bam")).no) def aggregate_mhcI_PE(wildcards): checkpoint_output = checkpoints.split_reads_mhcI_PE.get(**wildcards).output[0] - return expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_PE/{no}_result.tsv", + return expand("results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE/{no}_result.tsv", sample=wildcards.sample, - group=wildcards.group, nartype=wildcards.nartype, no=glob_wildcards(os.path.join(checkpoint_output, "R1_{no}.bam")).no) -def get_predicted_mhcI_alleles(wildcards): - values = [] - - # routines to genotype from DNA - if "DNA" in config['hlatyping']['MHC-I_mode']: - if config['data']['dnaseq'] is not None: - for key in config['data']['dnaseq'].keys(): - - # exclude normal samples (if specified) - if config['data']['normal'] is not None: - normal = config['data']['normal'].split(' ') - if key in normal: - continue - - values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_{readtype}.tsv", - sample = wildcards.sample, - group = key, - nartype = "DNA", - readtype = config['data']['dnaseq_readtype']) # add either SE or PE - else: # if no dnaseq data is specified, but mode is DNA or BOTH, then ignore - print('dnaseq data has not been specified in the config file, but specified mode for hla genotyping in config file is DNA or BOTH -- will be ignored') - - # routines to genotype from RNA - if "RNA" in config['hlatyping']['MHC-I_mode']: - if config['data']['rnaseq'] is not None: - for key in config['data']['rnaseq'].keys(): - - # exclude normal samples (if specified) - if config['data']['normal'] is not None: - normal = config['data']['normal'].split(' ') - if key in normal: - continue - - values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_{readtype}.tsv", - sample = wildcards.sample, - group = key, - nartype = "RNA", - readtype = config['data']['rnaseq_readtype']) # add either SE or PE) - else: # if no rnaseq data is specified, but mode is RNA or BOTH, then ignore - print('rnaseq data has not been specified in the config file, but specified mode for hla genotyping in config file is RNA or BOTH -- will be ignored') - - if len(values) == 0: - print('No data found. Check config file for correct specification of data and hla genotyping mode') - sys.exit(1) - - return values - def get_all_mhcI_alleles(wildcards): values = [] - if ("DNA" in config['hlatyping']['MHC-I_mode'] or - "RNA" in config['hlatyping']['MHC-I_mode']): - values += expand("results/{sample}/hla/mhc-I/genotyping/mhc-I.tsv", - sample = wildcards.sample) + if "DNA" in config["hlatyping"]["MHC-I_mode"]: + if len(config["data"]["dnaseq"]) != 0: + if config["data"]["dnaseq_readtype"] == "SE": + values += expand("results/{sample}/hla/mhc-I/genotyping/DNA_flt_merged_SE.tsv", + sample=wildcards.sample) + elif config["data"]["dnaseq_readtype"] == "PE": + values += expand("results/{sample}/hla/mhc-I/genotyping/DNA_flt_merged_PE.tsv", + sample=wildcards.sample) + + if "RNA" in config["hlatyping"]["MHC-I_mode"]: + if len(config["data"]["rnaseq"]) != 0: + if config["data"]["rnaseq_readtype"] == "SE": + values += expand("results/{sample}/hla/mhc-I/genotyping/RNA_flt_merged_SE.tsv", + sample=wildcards.sample) + elif config["data"]["rnaseq_readtype"] == "PE": + values += expand("results/{sample}/hla/mhc-I/genotyping/RNA_flt_merged_PE.tsv", + sample=wildcards.sample) if "custom" in config["hlatyping"]["MHC-I_mode"]: values += [config["data"]["custom"]["hlatyping"]["MHC-I"]] if len(values) == 0: - print('No hla data found. Check config file for correct specification of data and hla genotyping mode') + print("No HLA data found. Please check the config file for correct specification of data and HLA genotyping mode") sys.exit(1) return values diff --git a/workflow/rules/exitron.smk b/workflow/rules/exitron.smk index ed6a049..c59340e 100644 --- a/workflow/rules/exitron.smk +++ b/workflow/rules/exitron.smk @@ -135,6 +135,6 @@ rule combine_exitrons: "../envs/bcftools.yml" shell: """ - bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 + bcftools concat --naive-force -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 """ diff --git a/workflow/rules/hlatyping.smk b/workflow/rules/hlatyping.smk index 1b123f5..f3b3270 100644 --- a/workflow/rules/hlatyping.smk +++ b/workflow/rules/hlatyping.smk @@ -94,13 +94,8 @@ rule filter_reads_mhcI_SE: "../envs/yara.yml" shell: """ - if [ "{wildcards.nartype}" == "DNA" ]; then - yara_mapper -t {threads} -e 3 -f bam -u resources/hla/yara_index/{wildcards.nartype} \ + yara_mapper -t {threads} -e 3 -f bam -u resources/hla/yara_index/{wildcards.nartype} \ {input.reads} | samtools view -h -F 4 -b1 - | samtools sort - -o {output} > {log} - elif [ "{wildcards.nartype}" == "RNA" ]; then - yara_mapper -t {threads} -e 3 -f bam -u resources/hla/yara_index/{wildcards.nartype} \ - {input.reads} | samtools view -h -F 4 -b1 - | samtools sort - -o {output} > {log} - fi """ rule index_reads_mhcI_SE: @@ -116,22 +111,42 @@ rule index_reads_mhcI_SE: wrapper: "v2.3.0/bio/samtools/index" +rule merge_reads_mhcI_SE: + input: + unpack(get_filtered_reads_hlatyping_SE), + output: + bam="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam", + idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam.bai" + message: + "Merge the filtered {wildcards.nartype}seq reads for hlatyping of sample: {wildcards.sample}" + log: + "logs/{sample}/hla/merge_reads_mhcI_{nartype}_SE.log" + conda: + "../envs/samtools.yml" + threads: 4 + shell: + """ + samtools merge -@ {threads} {output.bam} {input.bam} > {log} 2>&1 + samtools index {output.bam} >> {log} 2>&1 + """ + + checkpoint split_reads_mhcI_SE: input: - reads="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE.bam", - index="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE.bam.bai" + reads="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam", + index="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam.bai" output: - directory("results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE/") + directory("results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE/") message: "Splitting filtered BAM files for HLA typing" log: - "logs/{sample}/hla/split_bam_{group}_{nartype}.log" + "logs/{sample}/hla/split_bam_{nartype}.log" conda: "../envs/gatk.yml" threads: 1 shell: """ - mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.group}_{wildcards.nartype}_flt_SE/ + mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.nartype}_flt_merged_SE/ gatk SplitSamByNumberOfReads \ -I {input.reads} \ --OUTPUT {output} \ @@ -141,44 +156,37 @@ checkpoint split_reads_mhcI_SE: rule hlatyping_mhcI_SE: input: - reads="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE/R_{no}.bam", + reads="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE/R_{no}.bam", output: - pdf="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_SE/{no}_coverage_plot.pdf", - tsv="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_SE/{no}_result.tsv" + pdf="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_SE/{no}_coverage_plot.pdf", + tsv="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_SE/{no}_result.tsv" message: "HLA typing from splitted BAM files" log: - "logs/{sample}/optitype/{group}_{nartype}_{no}_call.log" + "logs/{sample}/optitype/merged_{nartype}_{no}_call.log" conda: "../envs/optitype.yml" threads: 64 shell: """ - samtools index {input.reads} - if [ "{wildcards.nartype}" == "DNA" ]; then - OptiTypePipeline.py --input {input.reads} \ - --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_SE/ \ - --prefix {wildcards.no} --dna -v > {log} - elif [ "{wildcards.nartype}" == "RNA" ]; then - OptiTypePipeline.py --input {input.reads} \ - --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_SE/ \ - --prefix {wildcards.no} --rna -v > {log} - fi + python3 workflow/scripts/genotyping/optitype_wrapper.py \ + '{input.reads}' {wildcards.nartype} {wildcards.no} \ + results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.nartype}_flt_merged_SE/ > {log} """ rule combine_mhcI_SE: input: aggregate_mhcI_SE output: - "results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_SE.tsv" + "results/{sample}/hla/mhc-I/genotyping/{nartype}_SE.tsv" log: - "logs/{sample}/optitype/{group}_{nartype}_call.log" + "logs/{sample}/genotyping/combine_optitype_mhcI_{nartype}_call.log" conda: "../envs/basic.yml" threads: 1 shell: """ - python3 workflow/scripts/combine_optitype_results.py \ + python3 workflow/scripts/genotyping/combine_optitype_results.py \ '{input}' {output} """ @@ -219,24 +227,43 @@ rule index_reads_mhcI_PE: wrapper: "v2.3.0/bio/samtools/index" +rule merge_reads_mhcI_PE: + input: + unpack(get_filtered_reads_hlatyping_PE), + output: + bam="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_{readpair}.bam", + idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_{readpair}.bam.bai" + message: + "Merge the filtered {wildcards.nartype}seq reads for hlatyping of sample: {wildcards.sample} with readpair: {wildcards.readpair}" + log: + "logs/{sample}/hla/merge_reads_mhcI_{nartype}_PE_{readpair}.log", + conda: + "../envs/samtools.yml" + threads: 4 + shell: + """ + samtools merge -@ {threads} {output.bam} {input.bam} > {log} 2>&1 + samtools index {output.bam} >> {log} 2>&1 + """ + checkpoint split_reads_mhcI_PE: input: - fwd="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R1.bam", - fwd_idx="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R1.bam.bai", - rev="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R2.bam", - rev_idx="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R2.bam.bai" + fwd="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R1.bam", + fwd_idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R1.bam.bai", + rev="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R2.bam", + rev_idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R2.bam.bai" output: - directory("results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE/") + directory("results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE/") message: "Splitting filtered BAM files for HLA typing" log: - "logs/{sample}/hla/split_bam_{group}_{nartype}.log" + "logs/{sample}/hla/split_bam_{nartype}.log" conda: "../envs/gatk.yml" threads: 1 shell: """ - mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.group}_{wildcards.nartype}_flt_PE/ + mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.nartype}_flt_merged_PE/ gatk SplitSamByNumberOfReads \ -I {input.fwd} \ --OUTPUT {output} \ @@ -252,64 +279,41 @@ checkpoint split_reads_mhcI_PE: rule hlatyping_mhcI_PE: input: - fwd="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE/R1_{no}.bam", - rev="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE/R2_{no}.bam", + fwd="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE/R1_{no}.bam", + rev="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE/R2_{no}.bam", output: - pdf="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_PE/{no}_coverage_plot.pdf", - tsv="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_PE/{no}_result.tsv" + pdf="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE/{no}_coverage_plot.pdf", + tsv="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE/{no}_result.tsv" message: "HLA typing from splitted BAM files" log: - "logs/{sample}/optitype/{group}_{nartype}_{no}_call.log" + "logs/{sample}/optitype/merged_{nartype}_{no}_call.log" conda: "../envs/optitype.yml" threads: 64 shell: """ - samtools index {input.fwd} - samtools index {input.rev} - if [ "{wildcards.nartype}" == "DNA" ]; then - OptiTypePipeline.py --input {input.fwd} {input.rev} \ - --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_PE/ \ - --prefix {wildcards.no} --dna -v > {log} - elif [ "{wildcards.nartype}" == "RNA" ]; then - OptiTypePipeline.py --input {input.fwd} {input.rev} \ - --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_PE/ \ - --prefix {wildcards.no} --rna -v > {log} - fi - """ + python3 workflow/scripts/genotyping/optitype_wrapper.py \ + '{input.fwd} {input.rev}' {wildcards.nartype} {wildcards.no} \ + results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.nartype}_flt_merged_PE/ > {log} -rule combine_mhcI_PE: - input: - aggregate_mhcI_PE - output: - "results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_PE.tsv" - log: - "logs/{sample}/optitype/{group}_{nartype}_call.log" - conda: - "../envs/basic.yml" - threads: 1 - shell: - """ - python3 workflow/scripts/combine_optitype_results.py \ - '{input}' {output} """ -rule merge_predicted_mhcI_alleles: +rule combine_hlatyping_mhcI_PE: input: - get_predicted_mhcI_alleles + aggregate_mhcI_PE, output: - "results/{sample}/hla/mhc-I/genotyping/mhc-I.tsv", + "results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE.tsv", message: - "Merging HLA alleles from different sources" + "Combining HLA alleles from predicted optitype results" log: - "logs/{sample}/optitype/merge_predicted_mhc-I.log" + "logs/{sample}/genotyping/combine_optitype_mhcI_{nartype}_PE.log" conda: "../envs/basic.yml" threads: 1 shell: """ - python workflow/scripts/genotyping/merge_predicted_mhcI.py \ + python3 workflow/scripts/genotyping/combine_optitype_results.py \ '{input}' {output} """ @@ -319,7 +323,7 @@ rule combine_all_mhcI_alleles: output: "results/{sample}/hla/mhc-I.tsv" message: - "Combining HLA alleles from different sources" + "Combining HLA alleles from different sources (e.g., predicted and user-defined alleles)" log: "logs/{sample}/genotyping/combine_all_mhc-I.log" conda: @@ -328,7 +332,7 @@ rule combine_all_mhcI_alleles: shell: """ python workflow/scripts/genotyping/combine_all_alleles.py \ - '{input}' mhc-I {output} > {log} 2>&1\ + '{input}' mhc-I {output} """ diff --git a/workflow/rules/indel.smk b/workflow/rules/indel.smk index c857070..d252702 100644 --- a/workflow/rules/indel.smk +++ b/workflow/rules/indel.smk @@ -129,7 +129,7 @@ rule combine_longindels: "../envs/bcftools.yml" shell: """ - bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 + bcftools concat --naive-force -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 """ ####### MUTECT2 ###### @@ -417,6 +417,6 @@ rule combine_somatic_SNVs_m2: "../envs/bcftools.yml" shell: """ - bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 + bcftools concat --naive-force -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1 """ diff --git a/workflow/scripts/genotyping/combine_all_alleles.py b/workflow/scripts/genotyping/combine_all_alleles.py index 56b7ce8..7c6ba1b 100644 --- a/workflow/scripts/genotyping/combine_all_alleles.py +++ b/workflow/scripts/genotyping/combine_all_alleles.py @@ -49,4 +49,9 @@ def main(): fh_out.write(f'{alleles[allele]}\t{allele}\n') fh_out.close() + if len(alleles) == 0: + print("No valid alleles found in the input files!") + print("Please check the input files (e.g., dnaseq, rnaseq, user-provided alleles) and try again") + sys.exit(1) + main() diff --git a/workflow/scripts/combine_optitype_results.py b/workflow/scripts/genotyping/combine_optitype_results.py similarity index 88% rename from workflow/scripts/combine_optitype_results.py rename to workflow/scripts/genotyping/combine_optitype_results.py index b5ebe78..12617dc 100644 --- a/workflow/scripts/combine_optitype_results.py +++ b/workflow/scripts/genotyping/combine_optitype_results.py @@ -9,7 +9,10 @@ def main(): for alfile in sys.argv[1].split(' '): with open(alfile, 'r') as f: - next(f) + try: + next(f) + except StopIteration: + continue # empty file for line in f: line_list = line.rstrip().split('\t') diff --git a/workflow/scripts/genotyping/merge_predicted_mhcI.py b/workflow/scripts/genotyping/merge_predicted_mhcI.py deleted file mode 100644 index ed94b4e..0000000 --- a/workflow/scripts/genotyping/merge_predicted_mhcI.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -import sys -import re -from pathlib import Path - -def main(): - files = sys.argv[1] - alleles = {} - for file in files.rstrip().split(' '): - filestem = Path(file).stem - se = re.search(r'^(.+)_(RNA|DNA)_(SE|PE)', filestem) - group = se.group(1) - - fh = open(file, 'r') - for line in fh: - al = line.rstrip() - if al not in alleles: - alleles[al] = [] - - alleles[al].append(group) - - - out = open(sys.argv[2], 'w') - for al in dict(sorted(alleles.items())): - for i,v in enumerate(alleles[al]): - if i == 0: - out.write(v) - else: - out.write(f',{v}') - out.write(f'\t{al}\n') - -main() diff --git a/workflow/scripts/genotyping/optitype_wrapper.py b/workflow/scripts/genotyping/optitype_wrapper.py new file mode 100644 index 0000000..f50b1b9 --- /dev/null +++ b/workflow/scripts/genotyping/optitype_wrapper.py @@ -0,0 +1,42 @@ +import sys +import os +import subprocess + +""" + Usage: + python3 optitype_wrapper.py +""" + +def main(): + inbams = sys.argv[1] + nartype = "dna" if sys.argv[2] == "DNA" else "rna" + nartype = sys.argv[2] + prefix = sys.argv[3] + outpath = sys.argv[4] + + for filename in inbams.split(' '): + if not os.path.exists(filename + '.bai'): + print("Indexing BAM file: " + filename) + samtools_idx = subprocess.Popen("samtools index " + + filename, shell=True) + + # check if input file is not empty (using samtools) + try: + samtools = subprocess.Popen("samtools view -c " + inbams.split(' ')[0] + " 2>/dev/null", + stdout=subprocess.PIPE, shell=True) + + if int(samtools.communicate()[0]) < 10: + print("Input BAM file: " + inbams + " is empty") + # create an empty output file + pdf = subprocess.Popen("touch " + outpath + prefix + "_coverage_plot.pdf", shell=True) + tsv = subprocess.Popen("touch " + outpath + prefix + "_result.tsv", shell=True) + else: + # call optitype + optitype = subprocess.Popen("optitype --input " + inbams + + " --prefix " + prefix + " --" + nartype + "-v", shell=True) + + + except subprocess.CalledProcessError as e: + print("samtools failed") + +main()