diff --git a/main.nf b/main.nf index b906cf8..cd39819 100644 --- a/main.nf +++ b/main.nf @@ -1,156 +1,10 @@ nextflow.enable.dsl=2 -process seqkit_fetch_target { -conda "seqkit" -input: - val seq - path fasta -output: - path "${seq}_target.fa" , emit:fasta - path "${seq}_target.gtf" , emit:gtf -shell: -''' -echo ">seq -!{seq} -" > seq.fa - -seqkit locate -i --gtf -p "!{seq}" !{fasta} > !{seq}_target.gtf - -seqkit subseq --gtf !{seq}_target.gtf -u 50000 -d 50000 !{fasta} > !{seq}_target.fa - -if [[ ! -s !{seq}_target.gtf ]] -then -exit 7 -fi - -seqkit stat !{seq}_target.gtf -seqkit stat !{seq}_target.fa -''' -} - -process primer3_index { - conda "genometester4" - publishDir "results/${task.process}",mode:'link',overwrite:'true' - cache 'deep' - tag "$fasta" - input: - path fasta - output: - path "${fasta}*.list" - shell: -''' -glistmaker !{fasta} -w 11 -glistmaker !{fasta} -w 16 -ln out_11.list !{fasta}_11.list -ln out_16.list !{fasta}_16.list -''' -} - -process primer3_conf { - publishDir "results/${task.process}",mode:'link',overwrite:'true' - cache 'deep' - tag "$fasta" - input: - path fasta - path target - output: - path "custom_primer3.conf" - shell: -''' -##The MASK_KMERLIST does give an error if run with a dummy value, so expect it is working? -## See the Primer3 manual for details on these parameters - -## The "SEQUENCE_TARGET" parameter is fairly important, as that ensures the PCR product goes across the gRNA site -## It should be the value of the `-u` parameter in seqkit_fetch_target -## -## The SEQUENCE_EXCLUDED_REGION ensures it doesn't pick a product right on the gRNA site -## It should be the value of the `-u` parameter in seqkit_fetch_target, minus 1 half of 70 (35) -## -## - -echo "SEQUENCE_ID=$(seqkit fx2tab !{target} | cut -f 1) -SEQUENCE_TEMPLATE=$(seqkit fx2tab !{target} | cut -f 2) -SEQUENCE_TARGET=50000,20 -SEQUENCE_EXCLUDED_REGION=49965,70 -PRIMER_TASK=generic -PRIMER_PICK_LEFT_PRIMER=1 -PRIMER_PICK_INTERNAL_OLIGO=0 -PRIMER_PICK_RIGHT_PRIMER=1 -PRIMER_OPT_SIZE=20 -PRIMER_MIN_SIZE=18 -PRIMER_MAX_SIZE=22 -PRIMER_PRODUCT_SIZE_RANGE=75-500 -PRIMER_EXPLAIN_FLAG=1 -PRIMER_MASK_TEMPLATE=1 -PRIMER_MASK_KMERLIST_PREFIX=!{fasta} -PRIMER_MASK_KMERLIST_PATH=./kmer_lists/ -=" > custom_primer3.conf -''' -} - -process primer3_calc { - conda "primer3" - publishDir "results/${task.process}",mode:'link',overwrite:'true' - cache 'deep' - tag "$kmer_lists $conf" - input: - path conf - path kmer_lists - output: - path "*primer3.txt" - shell: -''' -mkdir -p kmer_lists -ln ./*.list kmer_lists - -ID=$(head -n 1 !{conf} | cut -f 2 -d '=' | cut -f 2 -d ' ') - -primer3_core -h 2>&1 | grep "This is primer3" > ${ID}_primer3.txt -cat ${ID}_primer3.txt ## Print version to stdout -primer3_core !{conf} >> ${ID}_primer3.txt -''' -} - -process primer3_results2fasta { -publishDir "results/${task.process}",mode:'link',overwrite:'true' -cache 'deep' -input: - path results -output: - path "${results}.fa" -tag "$results" -shell: -''' -#!/usr/bin/env python -import re -import os -import os.path -print("hello world!") - -with open("!{results}", "r") as input_handle: - data = input_handle.read() -##print(data) - -id_match = re.search('SEQUENCE_ID=(.+)',data) -id = id_match.group(1) - -bespoke_regex = '(?PPRIMER_PAIR_[0-9]+)_PENALTY=(?P[0-9.]+).+?PRIMER_LEFT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_RIGHT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_SIZE=(?P[0-9]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_TM=[0-9.]+' - -matches = list(re.finditer(bespoke_regex,data,flags=re.MULTILINE|re.DOTALL)) - -output_handle = open("!{results}.fa", "w") -for m in matches: - line = ">{ID} primer3 {SUBID} penalty:{PEN} type:LEFT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep - output_handle.write(line) - line = m.group("left")+os.linesep - output_handle.write(line) - line = ">{ID} primer3 {SUBID} penalty:{PEN} type:RIGHT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep - output_handle.write(line) - line = m.group("right")+os.linesep - output_handle.write(line) -output_handle.close() -''' -} +include { seqkit_fetch_target } from "./modules/seqkit_fetch_target" +include { primer3_conf } from "./modules/primer3_conf/" +include { primer3_index } from "./modules/primer3_index/" +include { primer3_calc } from "./modules/primer3_calc/" +include { primer3_results2fasta } from "./modules/primer3_results2fasta/" workflow { ref = Channel.fromPath(params.fasta) diff --git a/modules/primer3_calc/main.nf b/modules/primer3_calc/main.nf new file mode 100644 index 0000000..7737333 --- /dev/null +++ b/modules/primer3_calc/main.nf @@ -0,0 +1,25 @@ +process primer3_calc { + conda "primer3" + publishDir "results/${task.process}",mode:'link',overwrite:'true' + cache 'deep' + tag "$kmer_lists $conf" + + input: + path conf + path kmer_lists + + output: + path "*primer3.txt" + + shell: + ''' + mkdir -p kmer_lists + ln ./*.list kmer_lists + + ID=$(head -n 1 !{conf} | cut -f 2 -d '=' | cut -f 2 -d ' ') + + primer3_core -h 2>&1 | grep "This is primer3" > ${ID}_primer3.txt + cat ${ID}_primer3.txt ## Print version to stdout + primer3_core !{conf} >> ${ID}_primer3.txt + ''' +} diff --git a/modules/primer3_conf/main.nf b/modules/primer3_conf/main.nf new file mode 100644 index 0000000..60a4130 --- /dev/null +++ b/modules/primer3_conf/main.nf @@ -0,0 +1,44 @@ +process primer3_conf { + publishDir "results/${task.process}",mode:'link',overwrite:'true' + cache 'deep' + tag "$fasta" + + input: + path fasta + path target + + output: + path "custom_primer3.conf" + + shell: + ''' + ##The MASK_KMERLIST does give an error if run with a dummy value, so expect it is working? + ## See the Primer3 manual for details on these parameters + + ## The "SEQUENCE_TARGET" parameter is fairly important, as that ensures the PCR product goes across the gRNA site + ## It should be the value of the `-u` parameter in seqkit_fetch_target + ## + ## The SEQUENCE_EXCLUDED_REGION ensures it doesn't pick a product right on the gRNA site + ## It should be the value of the `-u` parameter in seqkit_fetch_target, minus 1 half of 70 (35) + ## + ## + + echo "SEQUENCE_ID=$(seqkit fx2tab !{target} | cut -f 1) + SEQUENCE_TEMPLATE=$(seqkit fx2tab !{target} | cut -f 2) + SEQUENCE_TARGET=50000,20 + SEQUENCE_EXCLUDED_REGION=49965,70 + PRIMER_TASK=generic + PRIMER_PICK_LEFT_PRIMER=1 + PRIMER_PICK_INTERNAL_OLIGO=0 + PRIMER_PICK_RIGHT_PRIMER=1 + PRIMER_OPT_SIZE=20 + PRIMER_MIN_SIZE=18 + PRIMER_MAX_SIZE=22 + PRIMER_PRODUCT_SIZE_RANGE=75-500 + PRIMER_EXPLAIN_FLAG=1 + PRIMER_MASK_TEMPLATE=1 + PRIMER_MASK_KMERLIST_PREFIX=!{fasta} + PRIMER_MASK_KMERLIST_PATH=./kmer_lists/ + =" > custom_primer3.conf + ''' +} diff --git a/modules/primer3_index/main.nf b/modules/primer3_index/main.nf new file mode 100644 index 0000000..7f19ddf --- /dev/null +++ b/modules/primer3_index/main.nf @@ -0,0 +1,20 @@ +process primer3_index { + conda "genometester4" + publishDir "results/${task.process}",mode:'link',overwrite:'true' + cache 'deep' + tag "$fasta" + + input: + path fasta + + output: + path "${fasta}*.list" + + shell: + ''' + glistmaker !{fasta} -w 11 + glistmaker !{fasta} -w 16 + ln out_11.list !{fasta}_11.list + ln out_16.list !{fasta}_16.list + ''' +} diff --git a/modules/primer3_results2fasta/main.nf b/modules/primer3_results2fasta/main.nf new file mode 100644 index 0000000..2486aa3 --- /dev/null +++ b/modules/primer3_results2fasta/main.nf @@ -0,0 +1,43 @@ +process primer3_results2fasta { + publishDir "results/${task.process}",mode:'link',overwrite:'true' + cache 'deep' + + input: + path results + + output: + path "${results}.fa" + tag "$results" + + shell: + ''' + #!/usr/bin/env python + import re + import os + import os.path + print("hello world!") + + with open("!{results}", "r") as input_handle: + data = input_handle.read() + ##print(data) + + id_match = re.search('SEQUENCE_ID=(.+)',data) + id = id_match.group(1) + + bespoke_regex = '(?PPRIMER_PAIR_[0-9]+)_PENALTY=(?P[0-9.]+).+?PRIMER_LEFT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_RIGHT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_SIZE=(?P[0-9]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_TM=[0-9.]+' + + matches = list(re.finditer(bespoke_regex,data,flags=re.MULTILINE|re.DOTALL)) + + output_handle = open("!{results}.fa", "w") + for m in matches: + line = ">{ID} primer3 {SUBID} penalty:{PEN} type:LEFT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep + output_handle.write(line) + line = m.group("left")+os.linesep + output_handle.write(line) + line = ">{ID} primer3 {SUBID} penalty:{PEN} type:RIGHT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep + output_handle.write(line) + line = m.group("right")+os.linesep + output_handle.write(line) + output_handle.close() + ''' +} diff --git a/modules/seqkit_fetch_target/main.nf b/modules/seqkit_fetch_target/main.nf new file mode 100644 index 0000000..dcbdcdd --- /dev/null +++ b/modules/seqkit_fetch_target/main.nf @@ -0,0 +1,30 @@ +process seqkit_fetch_target { + conda "seqkit" + + input: + val seq + path fasta + + output: + path "${seq}_target.fa" , emit:fasta + path "${seq}_target.gtf" , emit:gtf + + shell: + ''' + echo ">seq + !{seq} + " > seq.fa + + seqkit locate -i --gtf -p "!{seq}" !{fasta} > !{seq}_target.gtf + + seqkit subseq --gtf !{seq}_target.gtf -u 50000 -d 50000 !{fasta} > !{seq}_target.fa + + if [[ ! -s !{seq}_target.gtf ]] + then + exit 7 + fi + + seqkit stat !{seq}_target.gtf + seqkit stat !{seq}_target.fa + ''' +}