From f5a4b38643cfee97176ab7bc440d4779a0dd5246 Mon Sep 17 00:00:00 2001
From: riasc <richard.schaefer@zoho.com>
Date: Mon, 24 Jun 2024 23:51:22 -0500
Subject: [PATCH] enhanced UI

---
 CHANGELOG.md                                  |   5 +-
 workflow/envs/spladder.yml                    |   2 +-
 workflow/rules/altsplicing.smk                |   2 +-
 workflow/rules/common.smk                     | 146 ++++++++++-------
 workflow/rules/exitron.smk                    |   2 +-
 workflow/rules/hlatyping.smk                  | 152 +++++++++---------
 workflow/rules/indel.smk                      |   4 +-
 .../scripts/genotyping/combine_all_alleles.py |   5 +
 .../combine_optitype_results.py               |   5 +-
 .../genotyping/merge_predicted_mhcI.py        |  32 ----
 .../scripts/genotyping/optitype_wrapper.py    |  42 +++++
 11 files changed, 228 insertions(+), 169 deletions(-)
 rename workflow/scripts/{ => genotyping}/combine_optitype_results.py (88%)
 delete mode 100644 workflow/scripts/genotyping/merge_predicted_mhcI.py
 create mode 100644 workflow/scripts/genotyping/optitype_wrapper.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 565465e..f41306b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,7 +21,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fix 
 
 - Separated samtools, bcftools and realign environments to avoid conflicts
-- Updated splAdder to v3.0.5
+- Changed order of genotyping rules to catch errors when no alleles can be found
+    - Alleles are merged according to nartype (e.g., DNA, RNA) and then combined
+- Force concat of VCF files in genotyping to avoid errors when no variants are found
+- Added optitype wrapper to avoid errors when empty BAM files are provided / no HLA reads
 
 ## [0.2.6] - 2024-06-20
 
diff --git a/workflow/envs/spladder.yml b/workflow/envs/spladder.yml
index 3eea7de..8796f05 100644
--- a/workflow/envs/spladder.yml
+++ b/workflow/envs/spladder.yml
@@ -5,4 +5,4 @@ dependencies:
  - python=3.8
  - pip
  - pip:
-   - spladder==3.0.5
+   - spladder==3.0.4
diff --git a/workflow/rules/altsplicing.smk b/workflow/rules/altsplicing.smk
index 4105058..eb13eeb 100644
--- a/workflow/rules/altsplicing.smk
+++ b/workflow/rules/altsplicing.smk
@@ -73,5 +73,5 @@ rule combine_altsplicing:
     "../envs/bcftools.yml"
   shell:
     """
-      bcftools concat --naive -O z {input} -o  - | bcftools sort -O z -o {output} > {log} 2>&1
+      bcftools concat --naive-force -O z {input} -o  - | bcftools sort -O z -o {output} > {log} 2>&1
     """
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index b6d4402..869e024 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -196,84 +196,118 @@ def get_input_hlatyping_PE(wildcards):
     )
 
 
+def get_filtered_reads_hlatyping_SE(wildcards):
+  bam = []
+  idx = []
+
+  if wildcards.nartype == "DNA":
+    if len(config["data"]["dnaseq"]) != 0:
+      for key in config["data"]["dnaseq"].keys():
+        bam += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_SE.bam",
+                      sample=wildcards.sample,
+                      group=key)
+        idx += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_SE.bam.bai",
+                      sample=wildcards.sample,
+                      group=key)
+
+
+  if wildcards.nartype == "RNA":
+    if len(config["data"]["rnaseq"]) != 0:
+      for key in config["data"]["rnaseq"].keys():
+        bam += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_SE.bam",
+                      sample=wildcards.sample,
+                      group=key)
+        idx += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_SE.bam.bai",
+                      sample=wildcards.sample,
+                      group=key)
+
+  return dict(
+      zip(
+        ["bam", "idx"],
+        [bam, idx]
+    )
+  )
+
+
+def get_filtered_reads_hlatyping_PE(wildcards):
+  bam = []
+  idx = []
+
+  if wildcards.nartype == "DNA":
+    if len(config["data"]["dnaseq"]) != 0:
+      for key in config["data"]["dnaseq"].keys():
+        bam += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_PE_{readpair}.bam",
+                      sample=wildcards.sample,
+                      group=key,
+                      readpair=wildcards.readpair)
+        idx += expand("results/{sample}/hla/mhc-I/reads/{group}_DNA_flt_PE_{readpair}.bam.bai",
+                      sample=wildcards.sample,
+                      group=key,
+                      readpair=wildcards.readpair)
+
+  if wildcards.nartype == "RNA":
+    if len(config["data"]["rnaseq"]) != 0:
+      for key in config["data"]["rnaseq"].keys():
+        bam += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_PE_{readpair}.bam",
+                      sample=wildcards.sample,
+                      group=key,
+                      readpair=wildcards.readpair)
+
+        idx += expand("results/{sample}/hla/mhc-I/reads/{group}_RNA_flt_PE_{readpair}.bam.bai",
+                      sample=wildcards.sample,
+                      group=key,
+                      readpair=wildcards.readpair)
+
+  return dict(
+      zip(
+        ["bam", "idx"],
+        [bam, idx]
+    )
+  )
+
+
 def aggregate_mhcI_SE(wildcards):
   checkpoint_output = checkpoints.split_reads_mhcI_SE.get(**wildcards).output[0]
-  return expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_SE/{no}_result.tsv",
+  return expand("results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_SE/{no}_result.tsv",
     sample=wildcards.sample,
-    group=wildcards.group,
     nartype=wildcards.nartype,
     no=glob_wildcards(os.path.join(checkpoint_output, "R_{no}.bam")).no)
 
 
 def aggregate_mhcI_PE(wildcards):
   checkpoint_output = checkpoints.split_reads_mhcI_PE.get(**wildcards).output[0]
-  return expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_PE/{no}_result.tsv",
+  return expand("results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE/{no}_result.tsv",
     sample=wildcards.sample,
-    group=wildcards.group,
     nartype=wildcards.nartype,
     no=glob_wildcards(os.path.join(checkpoint_output, "R1_{no}.bam")).no)
 
 
-def get_predicted_mhcI_alleles(wildcards):
-  values = []
-
-  # routines to genotype from DNA
-  if "DNA" in config['hlatyping']['MHC-I_mode']:
-    if config['data']['dnaseq'] is not None:
-      for key in config['data']['dnaseq'].keys():
-       
-        # exclude normal samples (if specified)
-        if config['data']['normal'] is not None:
-          normal = config['data']['normal'].split(' ')
-          if key in normal:
-            continue
-
-        values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_{readtype}.tsv",
-                         sample = wildcards.sample,
-                         group = key,
-                         nartype = "DNA",
-                         readtype = config['data']['dnaseq_readtype']) # add either SE or PE
-    else: # if no dnaseq data is specified, but mode is DNA or BOTH, then ignore
-      print('dnaseq data has not been specified in the config file, but specified mode for hla genotyping in config file is DNA or BOTH -- will be ignored')
-
-  # routines to genotype from RNA
-  if "RNA" in config['hlatyping']['MHC-I_mode']:
-    if config['data']['rnaseq'] is not None:
-      for key in config['data']['rnaseq'].keys():
-
-        # exclude normal samples (if specified)
-        if config['data']['normal'] is not None:
-          normal = config['data']['normal'].split(' ')
-          if key in normal:
-            continue
-
-        values += expand("results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_{readtype}.tsv",
-                           sample = wildcards.sample,
-                           group = key,
-                           nartype = "RNA",
-                           readtype = config['data']['rnaseq_readtype']) # add either SE or PE)
-    else: # if no rnaseq data is specified, but mode is RNA or BOTH, then ignore
-      print('rnaseq data has not been specified in the config file, but specified mode for hla genotyping in config file is RNA or BOTH -- will be ignored')
-
-  if len(values) == 0:
-    print('No data found. Check config file for correct specification of data and hla genotyping mode')
-    sys.exit(1)
-
-  return values
-
 def get_all_mhcI_alleles(wildcards):
   values = []
 
-  if ("DNA" in config['hlatyping']['MHC-I_mode'] or
-      "RNA" in config['hlatyping']['MHC-I_mode']):
-    values += expand("results/{sample}/hla/mhc-I/genotyping/mhc-I.tsv",
-                    sample = wildcards.sample)
+  if "DNA" in config["hlatyping"]["MHC-I_mode"]:
+    if len(config["data"]["dnaseq"]) != 0:
+      if config["data"]["dnaseq_readtype"] == "SE":
+        values += expand("results/{sample}/hla/mhc-I/genotyping/DNA_flt_merged_SE.tsv",
+                         sample=wildcards.sample)
+      elif config["data"]["dnaseq_readtype"] == "PE":
+        values += expand("results/{sample}/hla/mhc-I/genotyping/DNA_flt_merged_PE.tsv",
+                         sample=wildcards.sample)
+
+  if "RNA" in config["hlatyping"]["MHC-I_mode"]:
+    if len(config["data"]["rnaseq"]) != 0:
+      if config["data"]["rnaseq_readtype"] == "SE":
+        values += expand("results/{sample}/hla/mhc-I/genotyping/RNA_flt_merged_SE.tsv",
+                         sample=wildcards.sample)
+      elif config["data"]["rnaseq_readtype"] == "PE":
+        values += expand("results/{sample}/hla/mhc-I/genotyping/RNA_flt_merged_PE.tsv",
+                         sample=wildcards.sample)
 
   if "custom" in config["hlatyping"]["MHC-I_mode"]:
     values += [config["data"]["custom"]["hlatyping"]["MHC-I"]]
 
   if len(values) == 0:
-    print('No hla data found. Check config file for correct specification of data and hla genotyping mode')
+    print("No HLA data found. Please check the config file for correct specification of data and HLA genotyping mode")
     sys.exit(1)
 
   return values
diff --git a/workflow/rules/exitron.smk b/workflow/rules/exitron.smk
index ed6a049..c59340e 100644
--- a/workflow/rules/exitron.smk
+++ b/workflow/rules/exitron.smk
@@ -135,6 +135,6 @@ rule combine_exitrons:
     "../envs/bcftools.yml"
   shell:
     """
-      bcftools concat --naive -O z {input} -o  - | bcftools sort -O z -o {output} > {log} 2>&1
+      bcftools concat --naive-force -O z {input} -o  - | bcftools sort -O z -o {output} > {log} 2>&1
     """
 
diff --git a/workflow/rules/hlatyping.smk b/workflow/rules/hlatyping.smk
index 1b123f5..f3b3270 100644
--- a/workflow/rules/hlatyping.smk
+++ b/workflow/rules/hlatyping.smk
@@ -94,13 +94,8 @@ rule filter_reads_mhcI_SE:
     "../envs/yara.yml"
   shell:
     """
-      if [ "{wildcards.nartype}" == "DNA" ]; then
-        yara_mapper -t {threads} -e 3 -f bam -u resources/hla/yara_index/{wildcards.nartype} \
+      yara_mapper -t {threads} -e 3 -f bam -u resources/hla/yara_index/{wildcards.nartype} \
           {input.reads} | samtools view -h -F 4 -b1 - | samtools sort - -o {output} > {log}
-      elif [ "{wildcards.nartype}" == "RNA" ]; then
-        yara_mapper -t {threads} -e 3 -f bam -u resources/hla/yara_index/{wildcards.nartype} \
-            {input.reads} | samtools view -h -F 4 -b1 - | samtools sort - -o {output} > {log}
-      fi
     """
 
 rule index_reads_mhcI_SE:
@@ -116,22 +111,42 @@ rule index_reads_mhcI_SE:
   wrapper:
     "v2.3.0/bio/samtools/index"
 
+rule merge_reads_mhcI_SE:
+  input:
+    unpack(get_filtered_reads_hlatyping_SE),
+  output:
+    bam="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam",
+    idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam.bai"
+  message:
+    "Merge the filtered {wildcards.nartype}seq reads for hlatyping of sample: {wildcards.sample}"
+  log:
+    "logs/{sample}/hla/merge_reads_mhcI_{nartype}_SE.log"
+  conda:
+    "../envs/samtools.yml"
+  threads: 4
+  shell:
+    """
+      samtools merge -@ {threads} {output.bam} {input.bam} > {log} 2>&1
+      samtools index {output.bam} >> {log} 2>&1
+    """
+
+
 checkpoint split_reads_mhcI_SE:
   input:
-    reads="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE.bam",
-    index="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE.bam.bai"
+    reads="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam",
+    index="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE.bam.bai"
   output:
-    directory("results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE/")
+    directory("results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE/")
   message:
     "Splitting filtered BAM files for HLA typing"
   log:
-    "logs/{sample}/hla/split_bam_{group}_{nartype}.log"
+    "logs/{sample}/hla/split_bam_{nartype}.log"
   conda:
     "../envs/gatk.yml"
   threads: 1
   shell:
     """
-      mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.group}_{wildcards.nartype}_flt_SE/
+      mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.nartype}_flt_merged_SE/
       gatk SplitSamByNumberOfReads \
           -I {input.reads} \
           --OUTPUT {output} \
@@ -141,44 +156,37 @@ checkpoint split_reads_mhcI_SE:
 
 rule hlatyping_mhcI_SE:  
   input:
-    reads="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_SE/R_{no}.bam",
+    reads="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_SE/R_{no}.bam",
   output:
-    pdf="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_SE/{no}_coverage_plot.pdf",
-    tsv="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_SE/{no}_result.tsv"
+    pdf="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_SE/{no}_coverage_plot.pdf",
+    tsv="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_SE/{no}_result.tsv"
   message:
     "HLA typing from splitted BAM files"
   log:
-    "logs/{sample}/optitype/{group}_{nartype}_{no}_call.log"
+    "logs/{sample}/optitype/merged_{nartype}_{no}_call.log"
   conda:
     "../envs/optitype.yml"
   threads: 64
   shell:
     """
-      samtools index {input.reads}
-      if [ "{wildcards.nartype}" == "DNA" ]; then
-        OptiTypePipeline.py --input {input.reads} \
-            --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_SE/ \
-            --prefix {wildcards.no} --dna -v > {log}
-      elif [ "{wildcards.nartype}" == "RNA" ]; then
-        OptiTypePipeline.py --input {input.reads} \
-            --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_SE/ \
-            --prefix {wildcards.no} --rna -v > {log}
-      fi
+      python3 workflow/scripts/genotyping/optitype_wrapper.py \
+          '{input.reads}' {wildcards.nartype} {wildcards.no} \
+          results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.nartype}_flt_merged_SE/ > {log}
     """
 
 rule combine_mhcI_SE:
   input:
     aggregate_mhcI_SE
   output:
-    "results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_SE.tsv"
+    "results/{sample}/hla/mhc-I/genotyping/{nartype}_SE.tsv"
   log:
-    "logs/{sample}/optitype/{group}_{nartype}_call.log"
+    "logs/{sample}/genotyping/combine_optitype_mhcI_{nartype}_call.log"
   conda:
     "../envs/basic.yml"
   threads: 1
   shell:
     """
-      python3 workflow/scripts/combine_optitype_results.py \
+      python3 workflow/scripts/genotyping/combine_optitype_results.py \
           '{input}' {output}
     """
 
@@ -219,24 +227,43 @@ rule index_reads_mhcI_PE:
   wrapper:
     "v2.3.0/bio/samtools/index"
 
+rule merge_reads_mhcI_PE:
+  input:
+    unpack(get_filtered_reads_hlatyping_PE),
+  output:
+    bam="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_{readpair}.bam",
+    idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_{readpair}.bam.bai"
+  message:
+    "Merge the filtered {wildcards.nartype}seq reads for hlatyping of sample: {wildcards.sample} with readpair: {wildcards.readpair}"
+  log:
+    "logs/{sample}/hla/merge_reads_mhcI_{nartype}_PE_{readpair}.log",
+  conda:
+    "../envs/samtools.yml"
+  threads: 4
+  shell:
+    """
+      samtools merge -@ {threads} {output.bam} {input.bam} > {log} 2>&1
+      samtools index {output.bam} >> {log} 2>&1
+    """
+
 checkpoint split_reads_mhcI_PE:
   input:
-    fwd="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R1.bam",
-    fwd_idx="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R1.bam.bai",
-    rev="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R2.bam",
-    rev_idx="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE_R2.bam.bai"
+    fwd="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R1.bam",
+    fwd_idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R1.bam.bai",
+    rev="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R2.bam",
+    rev_idx="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE_R2.bam.bai"
   output:
-    directory("results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE/")
+    directory("results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE/")
   message:
     "Splitting filtered BAM files for HLA typing"
   log:
-    "logs/{sample}/hla/split_bam_{group}_{nartype}.log"
+    "logs/{sample}/hla/split_bam_{nartype}.log"
   conda:
     "../envs/gatk.yml"
   threads: 1
   shell:
     """
-      mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.group}_{wildcards.nartype}_flt_PE/
+      mkdir -p results/{wildcards.sample}/hla/mhc-I/reads/{wildcards.nartype}_flt_merged_PE/
       gatk SplitSamByNumberOfReads \
           -I {input.fwd} \
           --OUTPUT {output} \
@@ -252,64 +279,41 @@ checkpoint split_reads_mhcI_PE:
 
 rule hlatyping_mhcI_PE:  
   input:
-    fwd="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE/R1_{no}.bam",
-    rev="results/{sample}/hla/mhc-I/reads/{group}_{nartype}_flt_PE/R2_{no}.bam",
+    fwd="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE/R1_{no}.bam",
+    rev="results/{sample}/hla/mhc-I/reads/{nartype}_flt_merged_PE/R2_{no}.bam",
   output:
-    pdf="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_PE/{no}_coverage_plot.pdf",
-    tsv="results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_flt_PE/{no}_result.tsv"
+    pdf="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE/{no}_coverage_plot.pdf",
+    tsv="results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE/{no}_result.tsv"
   message:
     "HLA typing from splitted BAM files"
   log:
-    "logs/{sample}/optitype/{group}_{nartype}_{no}_call.log"
+    "logs/{sample}/optitype/merged_{nartype}_{no}_call.log"
   conda:
     "../envs/optitype.yml"
   threads: 64
   shell:
     """
-      samtools index {input.fwd}
-      samtools index {input.rev}
-      if [ "{wildcards.nartype}" == "DNA" ]; then
-        OptiTypePipeline.py --input {input.fwd} {input.rev} \
-            --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_PE/ \
-            --prefix {wildcards.no} --dna -v > {log}
-      elif [ "{wildcards.nartype}" == "RNA" ]; then
-        OptiTypePipeline.py --input {input.fwd} {input.rev} \
-            --outdir results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.group}_{wildcards.nartype}_flt_PE/ \
-            --prefix {wildcards.no} --rna -v > {log}
-      fi
-    """
+      python3 workflow/scripts/genotyping/optitype_wrapper.py \
+          '{input.fwd} {input.rev}' {wildcards.nartype} {wildcards.no} \
+          results/{wildcards.sample}/hla/mhc-I/genotyping/{wildcards.nartype}_flt_merged_PE/ > {log}
 
-rule combine_mhcI_PE:
-  input:
-    aggregate_mhcI_PE
-  output:
-    "results/{sample}/hla/mhc-I/genotyping/{group}_{nartype}_PE.tsv"
-  log:
-    "logs/{sample}/optitype/{group}_{nartype}_call.log"
-  conda:
-    "../envs/basic.yml"
-  threads: 1
-  shell:
-    """
-      python3 workflow/scripts/combine_optitype_results.py \
-          '{input}' {output}
     """
 
-rule merge_predicted_mhcI_alleles:
+rule combine_hlatyping_mhcI_PE:
   input:
-    get_predicted_mhcI_alleles
+    aggregate_mhcI_PE,
   output:
-    "results/{sample}/hla/mhc-I/genotyping/mhc-I.tsv",
+    "results/{sample}/hla/mhc-I/genotyping/{nartype}_flt_merged_PE.tsv",
   message:
-    "Merging HLA alleles from different sources"
+    "Combining HLA alleles from predicted optitype results"
   log:
-    "logs/{sample}/optitype/merge_predicted_mhc-I.log"
+    "logs/{sample}/genotyping/combine_optitype_mhcI_{nartype}_PE.log"
   conda:
     "../envs/basic.yml"
   threads: 1
   shell:
     """
-      python workflow/scripts/genotyping/merge_predicted_mhcI.py \
+      python3 workflow/scripts/genotyping/combine_optitype_results.py \
           '{input}' {output}
     """
 
@@ -319,7 +323,7 @@ rule combine_all_mhcI_alleles:
   output:
     "results/{sample}/hla/mhc-I.tsv"
   message:
-    "Combining HLA alleles from different sources"
+    "Combining HLA alleles from different sources (e.g., predicted and user-defined alleles)"
   log:
     "logs/{sample}/genotyping/combine_all_mhc-I.log"
   conda:
@@ -328,7 +332,7 @@ rule combine_all_mhcI_alleles:
   shell:
     """
       python workflow/scripts/genotyping/combine_all_alleles.py \
-          '{input}' mhc-I {output} > {log} 2>&1\
+          '{input}' mhc-I {output}
     """
 
     
diff --git a/workflow/rules/indel.smk b/workflow/rules/indel.smk
index c857070..d252702 100644
--- a/workflow/rules/indel.smk
+++ b/workflow/rules/indel.smk
@@ -129,7 +129,7 @@ rule combine_longindels:
     "../envs/bcftools.yml"
   shell:
     """
-      bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1
+      bcftools concat --naive-force -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1
     """
 
 ####### MUTECT2 ######
@@ -417,6 +417,6 @@ rule combine_somatic_SNVs_m2:
     "../envs/bcftools.yml"
   shell:
     """
-      bcftools concat --naive -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1
+      bcftools concat --naive-force -O z {input} -o - | bcftools sort -O z -o {output} > {log} 2>&1
     """
 
diff --git a/workflow/scripts/genotyping/combine_all_alleles.py b/workflow/scripts/genotyping/combine_all_alleles.py
index 56b7ce8..7c6ba1b 100644
--- a/workflow/scripts/genotyping/combine_all_alleles.py
+++ b/workflow/scripts/genotyping/combine_all_alleles.py
@@ -49,4 +49,9 @@ def main():
         fh_out.write(f'{alleles[allele]}\t{allele}\n')
     fh_out.close()
 
+    if len(alleles) == 0:
+        print("No valid alleles found in the input files!")
+        print("Please check the input files (e.g., dnaseq, rnaseq, user-provided alleles) and try again")
+        sys.exit(1)
+
 main()
diff --git a/workflow/scripts/combine_optitype_results.py b/workflow/scripts/genotyping/combine_optitype_results.py
similarity index 88%
rename from workflow/scripts/combine_optitype_results.py
rename to workflow/scripts/genotyping/combine_optitype_results.py
index b5ebe78..12617dc 100644
--- a/workflow/scripts/combine_optitype_results.py
+++ b/workflow/scripts/genotyping/combine_optitype_results.py
@@ -9,7 +9,10 @@ def main():
 
     for alfile in sys.argv[1].split(' '):
         with open(alfile, 'r') as f:
-            next(f)
+            try:
+                next(f)
+            except StopIteration:
+                continue # empty file
             for line in f:
 
                 line_list = line.rstrip().split('\t')
diff --git a/workflow/scripts/genotyping/merge_predicted_mhcI.py b/workflow/scripts/genotyping/merge_predicted_mhcI.py
deleted file mode 100644
index ed94b4e..0000000
--- a/workflow/scripts/genotyping/merge_predicted_mhcI.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import os
-import sys
-import re
-from pathlib import Path
-
-def main():
-    files = sys.argv[1]
-    alleles = {}
-    for file in files.rstrip().split(' '):
-        filestem = Path(file).stem
-        se = re.search(r'^(.+)_(RNA|DNA)_(SE|PE)', filestem)
-        group = se.group(1)
-
-        fh = open(file, 'r')
-        for line in fh:
-            al = line.rstrip()
-            if al not in alleles:
-                alleles[al] = []
-            
-            alleles[al].append(group)
-
-
-    out = open(sys.argv[2], 'w')
-    for al in dict(sorted(alleles.items())):
-        for i,v in enumerate(alleles[al]):
-            if i == 0:
-                out.write(v)
-            else:
-                out.write(f',{v}')
-        out.write(f'\t{al}\n')
-
-main()
diff --git a/workflow/scripts/genotyping/optitype_wrapper.py b/workflow/scripts/genotyping/optitype_wrapper.py
new file mode 100644
index 0000000..f50b1b9
--- /dev/null
+++ b/workflow/scripts/genotyping/optitype_wrapper.py
@@ -0,0 +1,42 @@
+import sys
+import os
+import subprocess
+
+"""
+    Usage:
+        python3 optitype_wrapper.py <input> <nartype> <prefix> <outpath>
+"""
+
+def main():
+    inbams = sys.argv[1]
+    nartype = "dna" if sys.argv[2] == "DNA" else "rna"
+    nartype = sys.argv[2]
+    prefix = sys.argv[3]
+    outpath = sys.argv[4]
+
+    for filename in inbams.split(' '):
+        if not os.path.exists(filename + '.bai'):
+            print("Indexing BAM file: " + filename)
+            samtools_idx = subprocess.Popen("samtools index " + 
+                                            filename, shell=True)
+
+    # check if input file is not empty (using samtools)
+    try:
+        samtools = subprocess.Popen("samtools view -c " + inbams.split(' ')[0] + " 2>/dev/null", 
+                                    stdout=subprocess.PIPE, shell=True)
+
+        if int(samtools.communicate()[0]) < 10:
+            print("Input BAM file: " + inbams + " is empty")
+            # create an empty output file
+            pdf = subprocess.Popen("touch " + outpath + prefix + "_coverage_plot.pdf", shell=True)
+            tsv = subprocess.Popen("touch " + outpath + prefix + "_result.tsv", shell=True)
+        else:
+            # call optitype 
+            optitype = subprocess.Popen("optitype --input " + inbams + 
+                                        " --prefix " + prefix + " --" + nartype + "-v", shell=True)
+
+
+    except subprocess.CalledProcessError as e:
+        print("samtools failed")
+
+main()