Skip to content

Commit

Permalink
add parameter to set sequence identity for vsearch
Browse files Browse the repository at this point in the history
update main.nf
  • Loading branch information
AmstlerStephan committed Jul 25, 2024
1 parent 7e381b6 commit 539d07a
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 69 deletions.
2 changes: 1 addition & 1 deletion lib/processes/cluster.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ process CLUSTER {
tuple val( "${sample}" ), val( "${target}" ), path( "cluster*" ), optional: true, emit:cluster_fastas

script:
def id = "${type}" == "raw" ? 0.90 : 0.99
def id = "${type}" == "raw" ? params.vsearch_sequence_identity : 0.99
"""
vsearch \
--clusterout_id \
Expand Down
56 changes: 0 additions & 56 deletions lib/workflows/umi-pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,6 @@ include {SPLIT_READS} from '../processes/split_reads.nf'
include {DETECT_UMI_FASTQ; DETECT_UMI_FASTQ as DETECT_UMI_CONSENSUS_FASTQ} from '../processes/detect_umi_fastq.nf'
include {CLUSTER; CLUSTER as CLUSTER_CONSENSUS} from '../processes/cluster.nf'
include {REFORMAT_FILTER_CLUSTER} from '../processes/reformat_filter_cluster.nf'
include {POLISH_CLUSTER} from '../processes/polish_cluster.nf'
include {FILTER_CONSENSUS_FASTQ} from '../processes/filter_consensus_fastq.nf'
include {REFORMAT_CONSENSUS_CLUSTER} from '../processes/reformat_consensus_cluster.nf'
include {LOFREQ as LOFREQ_CONSENSUS; LOFREQ as LOFREQ_FINAL_CONSENSUS} from '../processes/lofreq.nf'
include {MUTSERVE as MUTSERVE_CONSENSUS; MUTSERVE as MUTSERVE_FINAL_CONSENSUS} from '../processes/mutserve.nf'
include {FREEBAYES as FREEBAYES_CONSENSUS; FREEBAYES as FREEBAYES_FINAL_CONSENSUS} from '../processes/freebayes.nf'


// SUB-WORKFLOWS
Expand Down Expand Up @@ -86,58 +80,8 @@ workflow UMI_PIPELINE {
.filter{ sample, type, fastqs -> fastqs.class == ArrayList}
.set{ smolecule_cluster_fastqs_list }

smolecule_cluster_fastqs_list
.map{ sample, type, fastqs -> n_parsed_cluster.put("$sample", fastqs.size)}

smolecule_cluster_fastqs_list
.transpose( by: 2 )
.set{ smolecule_cluster_fastqs }

POLISH_CLUSTER( smolecule_cluster_fastqs, consensus )

POLISH_CLUSTER.out.consensus_fastq
.map{ sample, type, fastq -> tuple( groupKey(sample, n_parsed_cluster.get("$sample")), type, fastq) }
.groupTuple( )
.set{ merge_consensus }


if ( params.output_format == "fastq"){
MERGE_CONSENSUS_FASTQ(merge_consensus, consensus)
FILTER_CONSENSUS_FASTQ(MERGE_CONSENSUS_FASTQ.out.merged_consensus_fastq, consensus)
FILTER_CONSENSUS_FASTQ.out.filtered_consensus_fastq
.set{ consensus_fastq }
} else {
MERGE_CONSENSUS_FASTQ(merge_consensus, consensus)
.set{ consensus_fastq }
}

MAP_CONSENSUS( consensus_fastq, consensus, reference )
DETECT_UMI_CONSENSUS_FASTQ( consensus_fastq, consensus, umi_extract )
CLUSTER_CONSENSUS( DETECT_UMI_CONSENSUS_FASTQ.out.umi_extract_fastq , consensus )
REFORMAT_CONSENSUS_CLUSTER( CLUSTER_CONSENSUS.out.consensus_fasta, final_consensus, umi_reformat_consensus )
MAP_FINAL_CONSENSUS( REFORMAT_CONSENSUS_CLUSTER.out.consensus_fastq, final_consensus, reference )

if( params.call_variants ){
if( params.variant_caller == "lofreq" ){
LOFREQ_CONSENSUS( MAP_CONSENSUS.out.bam_consensus, consensus, reference, reference_fai )
LOFREQ_FINAL_CONSENSUS( MAP_FINAL_CONSENSUS.out.bam_consensus, final_consensus, reference, reference_fai )
}else if( params.variant_caller == "mutserve"){
MUTSERVE_CONSENSUS( MAP_CONSENSUS.out.bam_consensus, consensus, COPY_BED.out.bed, reference, reference_fai )
MUTSERVE_FINAL_CONSENSUS( MAP_FINAL_CONSENSUS.out.bam_consensus, final_consensus, COPY_BED.out.bed, reference, reference_fai )
}else if( params.variant_caller == "freebayes"){
FREEBAYES_CONSENSUS( MAP_CONSENSUS.out.bam_consensus, consensus, reference, reference_fai )
FREEBAYES_FINAL_CONSENSUS( MAP_FINAL_CONSENSUS.out.bam_consensus, final_consensus, reference, reference_fai )
}else{
exit 1, "${params.variant_caller} is not a valid option. \nPossible variant caller are <lofreq/mutserve/freebayes>"

}
}



}


//////////////////
// END PIPELINE //
//////////////////
Expand Down
29 changes: 17 additions & 12 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ if(params.help){
--version Display the current pipeline version and exit
--debug Run the pipeline in debug mode
Options: GENERAL - Required
Options: GENERAL - Required
--input [path/to/input/dir] [REQUIRED] Input directory containing (zipped) FASTQ files
--output [STR] [REQUIRED] A string that can be given to name the output directory
--reference [path/to/ref.fa] [REQUIRED] Path to the reference genome in fasta format
Expand All @@ -25,36 +25,41 @@ if(params.help){
--threads Number of maximum threads to use [default: availableProcessors -1]
Options: READ FILTERING
--min_read_length flag to enable subsampling [default: 0]
--min_qscore Seed to produce pseudorandom numbers [default: 0]
--min_read_length Minimum read length [default: 0]
--min_qscore Minimum quality score [default: 0]
Options: SUBSAMPLING
--subsampling flag to enable subsampling [default: false]
--subsampling Flag to enable subsampling [default: false]
--subsampling_seed Seed to produce pseudorandom numbers [default: 11]
--subsampling_readnumber Number of reads after subsampling [default: 100000]
Options: VARIANT CALLING
--call_variants flag to enable variant calling [default: false]
--variant_caller [STR] [REQUIRED if call_variants is set] Variant caller [lofreq | mutserve | freebayes ]
--call_variants Flag to enable variant calling [default: false]
--variant_caller [STR] [REQUIRED if call_variants is set] Variant caller [lofreq | mutserve | freebayes]
Options: ADVANCED
Options: ADVANCED
--min_reads_per_barcode Minimal number of fastq reads for each barcode [default: 1000]
--umi_errors Max differences between extracted UMIs of the read and UMI pattern [default: 3]
--umi_errors Max differences between extracted UMIs of the read and UMI pattern [default: 2]
--max_dist_umi Maximum distance allowed for UMI grouping [default: 2]
--vsearch_sequence_identity Minimum sequence identity for vsearch [default: 0.80]
--min_reads_per_cluster Min number of raw reads required for a consensus read [default: 20]
--max_reads_per_cluster Max number of raw reads used for a consensus read [default: 60]
--min_consensus_quality Minimum consensus read quality [default: 40]
--masking_strategy Masking strategy for low-quality regions [default: softmask]
--filter_strategy_clusters Filtering strategy for clusters with more than max_reads_per_cluster reads [random | quality] [default: quality]
--output_format Output format until the cluster filtering step [fasta | fastq] [default: fastq]
--write_reports Write stats of cluster and cluster filtering [default: true]
--min_overlap Min overlap with target region [default: 0.90]
--min_overlap Min overlap with target region [default: 0.95]
--include_secondary_reads Include secondary reads in the analysis [default: false]
--balance_strands Balance forward and reverse raw reads in clusters [default: true]
--medaka_model Medaka model used to compute consensus reads [default: "r1041_e82_400bps_hac_g615"]
--fwd_umi Forward UMI (Ftail...UMI...primer) [default: "TTTVVVVTTVVVVTTVVVVTTVVVVTTT"]
--rev_umi Reverse UMI (Rtail...UMI...primer) [default: "AAABBBBAABBBBAABBBBAABBBBAAA"]
--adapter_length Adapter length [default: 100]
--min_length Minimum combined UMI length [default: 40]
--max_length Maximum combined UMI length [default: 60]
--minimap2_param Set the parameters for minimap2 [default: "-ax map-ont -k 13"]
--include_secondary_reads Include secondary reads in the analysis [default: false]
--minimap2_param Set the parameters for minimap2 [default: "-ax map-ont -k 13 --MD"]
--include_secondary_reads Include secondary reads in the analysis [default: false]
Options: ADDITIONAL
--help Display this help information and exit
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ params {
min_reads_per_barcode = 1000
umi_errors = 2
max_dist_umi = 2
vsearch_sequence_identity = 0.80
min_reads_per_cluster = 20
max_reads_per_cluster = 60
min_consensus_quality = 40
Expand Down

0 comments on commit 539d07a

Please sign in to comment.