From d74eaba3fab5538ce58a8d150804503888323d8f Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 29 Jul 2024 22:33:22 -0500 Subject: [PATCH] refactor: Move script into bin --- bin/results2fasta.py | 27 +++++++++++++++++++++ modules/primer3_results2fasta/main.nf | 35 +++------------------------ 2 files changed, 31 insertions(+), 31 deletions(-) create mode 100755 bin/results2fasta.py diff --git a/bin/results2fasta.py b/bin/results2fasta.py new file mode 100755 index 0000000..cd48e77 --- /dev/null +++ b/bin/results2fasta.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +import re +import os +import os.path + +with open("!{results}", "r") as input_handle: +data = input_handle.read() +##print(data) + +id_match = re.search('SEQUENCE_ID=(.+)',data) +id = id_match.group(1) + +bespoke_regex = '(?PPRIMER_PAIR_[0-9]+)_PENALTY=(?P[0-9.]+).+?PRIMER_LEFT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_RIGHT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_SIZE=(?P[0-9]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_TM=[0-9.]+' + +matches = list(re.finditer(bespoke_regex,data,flags=re.MULTILINE|re.DOTALL)) + +output_handle = open("!{results}.fa", "w") +for m in matches: +line = ">{ID} primer3 {SUBID} penalty:{PEN} type:LEFT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep +output_handle.write(line) +line = m.group("left")+os.linesep +output_handle.write(line) +line = ">{ID} primer3 {SUBID} penalty:{PEN} type:RIGHT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep +output_handle.write(line) +line = m.group("right")+os.linesep +output_handle.write(line) +output_handle.close() diff --git a/modules/primer3_results2fasta/main.nf b/modules/primer3_results2fasta/main.nf index 04a43aa..088f048 100644 --- a/modules/primer3_results2fasta/main.nf +++ b/modules/primer3_results2fasta/main.nf @@ -9,35 +9,8 @@ process primer3_results2fasta { output: path "${results}.fa" - shell: - ''' - #!/usr/bin/env python - import re - import os - import os.path - print("hello world!") - - with open("!{results}", "r") as input_handle: - data = input_handle.read() - ##print(data) - - id_match = re.search('SEQUENCE_ID=(.+)',data) - id = id_match.group(1) - - bespoke_regex = '(?PPRIMER_PAIR_[0-9]+)_PENALTY=(?P[0-9.]+).+?PRIMER_LEFT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_RIGHT_[0-9]+_SEQUENCE=(?P[atcgATCGnN]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_SIZE=(?P[0-9]+).+?PRIMER_PAIR_[0-9]+_PRODUCT_TM=[0-9.]+' - - matches = list(re.finditer(bespoke_regex,data,flags=re.MULTILINE|re.DOTALL)) - - output_handle = open("!{results}.fa", "w") - for m in matches: - line = ">{ID} primer3 {SUBID} penalty:{PEN} type:LEFT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep - output_handle.write(line) - line = m.group("left")+os.linesep - output_handle.write(line) - line = ">{ID} primer3 {SUBID} penalty:{PEN} type:RIGHT product:{PROD}bp".format(ID=id,SUBID=m.group("id"),PEN=m.group("penalty"),PROD=m.group("size"))+os.linesep - output_handle.write(line) - line = m.group("right")+os.linesep - output_handle.write(line) - output_handle.close() - ''' + script: + """ + results2fasta.py ${results} ${results}.fa + """ }