Skip to content

Commit

Permalink
Improvements to Salmon SA and mapping stats
Browse files Browse the repository at this point in the history
  • Loading branch information
reganhayward committed Nov 22, 2024
1 parent 085cd4a commit 9f0e3b0
Show file tree
Hide file tree
Showing 14 changed files with 480 additions and 281 deletions.
47 changes: 18 additions & 29 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,38 @@ process {
]
}

withName: 'COMBINE_FILES|CREATE_TRANSCRIPTOME_FASTA|CREATE_TRANSCRIPTOME_FASTA_GFFREAD|REPLACE_ATTRIBUTE_GFF_STAR_SALMON*' {

publishDir = [
[
path: { "${params.outdir}/references" },
mode: params.publish_dir_mode,
withName: 'COMBINE_FILES|CREATE_TRANSCRIPTOME_FASTA|CREATE_TRANSCRIPTOME_FASTA_GFFREAD' {
publishDir = [
[path: { "${params.outdir}/references" },
mode: params.publish_dir_mode,
]
]
}

withName: 'REPLACE_ATTRIBUTE_GFF_STAR_SALMON_HOST|REPLACE_ATTRIBUTE_GFF_STAR_SALMON_PATHOGEN' {
publishDir = [ enabled: false ]
}

withName: 'UNCOMPRESS_HOST_FASTA_GENOME|UNCOMPRESS_PATHOGEN_FASTA_GENOME|UNCOMPRESS_HOST_GFF|UNCOMPRESS_HOST_TRNA_GFF|UNCOMPRESS_PATHOGEN_GFF' {

publishDir = [
[
path: { "${params.outdir}/references" },
mode: params.publish_dir_mode,
withName: 'UNCOMPRESS_HOST_FASTA_GENOME|UNCOMPRESS_PATHOGEN_FASTA_GENOME|UNCOMPRESS_HOST_GFF|UNCOMPRESS_PATHOGEN_GFF|COMBINE_FILES_PATHOGEN_HOST_GFF' {
publishDir = [
[path: { "${params.outdir}/references" },
mode: params.publish_dir_mode,
]
]

ext.prefix = 'uncompressed'
}


withName: 'COMBINE_HOST_GFF_FILES' {

publishDir = [
[
path: { "${params.outdir}/references/htseq" },
mode: params.publish_dir_mode,
publishDir = [
[path: { "${params.outdir}/references/htseq" },
mode: params.publish_dir_mode,
]
]
}

withName: 'REPLACE_ATTRIBUTE_GFF_STAR_SALMON_PATHOGEN|REPLACE_ATTRIBUTE_GFF_STAR_SALMON_HOST|REPLACE_ATTRIBUTE_GFF_STAR_SALMON_TRNA_FILE|COMBINE_HOST_GENOME_TRNA_GFF_STAR_SALMON|REPLACE_GENE_FEATURE_GFF_HOST_SALMON|REPLACE_GENE_FEATURE_GFF_PATHOGEN_SALMON|COMBINE_FILES_PATHOGEN_HOST_GFF|EXTRACT_ANNOTATIONS_PATHOGEN_SALMON|EXTRACT_ANNOTATIONS_HOST_SALMON' {

withName: 'REPLACE_GENE_FEATURE_GFF_HOST_SALMON|REPLACE_GENE_FEATURE_GFF_PATHOGEN_SALMON|EXTRACT_ANNOTATIONS_PATHOGEN_SALMON|EXTRACT_ANNOTATIONS_HOST_SALMON' {
publishDir = [
[
path: { "${params.outdir}/references/salmon" },
Expand All @@ -73,8 +69,6 @@ process {





withName: FASTQC {
ext.args = '--quiet'
ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('fastqc')) }
Expand All @@ -85,7 +79,6 @@ process {
pattern: "*{html,zip}"
]
]
container = 'quay.io/biocontainers/fastqc:0.11.9--0'
}

withName: FASTQC_AFTER_TRIMMING {
Expand Down Expand Up @@ -156,7 +149,9 @@ process {
]
}

withName: 'NFCORE_DUALRNASEQ:DUALRNASEQ:SALMON_SELECTIVE_ALIGNMENT:EXTRACT_PROCESSED_READS' {


withName: 'NFCORE_DUALRNASEQ:DUALRNASEQ:SALMON_SELECTIVE_ALIGNMENT:COMBINE_PROCESSED_READS|EXTRACT_PROCESSED_READS|COLLATE_PROCESSED_READS' {
publishDir = [
path: { "${params.outdir}/mapping_statistics/salmon_SA/" },
mode: params.publish_dir_mode
Expand Down Expand Up @@ -226,12 +221,6 @@ process {
]
}

withName: 'NFCORE_DUALRNASEQ:DUALRNASEQ:SALMON_ALIGNMENT_BASED:EXTRACT_PROCESSED_READS' {
publishDir = [
path: { "${params.outdir}/mapping_statistics/STAR_SALMON/" },
mode: params.publish_dir_mode
]
}

withName: 'NFCORE_DUALRNASEQ:DUALRNASEQ:SALMON_ALIGNMENT_BASED:TXIMPORT' {
publishDir = [
Expand Down
14 changes: 7 additions & 7 deletions conf/test_hackathon.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@ params {
// Genome references

// fasta_host = "https://github.com/nf-core/test-datasets/raw/dualrnaseq/references/GRCh38.p13_sub.fasta"
fasta_host = "data/GRCh38.p13_sub.fasta"
gff_host = "data/Human_gencode.v33_sub.gff3"
host_fasta_genome = "data/GRCh38.p13_sub.fasta"
host_gff = "data/Human_gencode.v33_sub.gff3"
// gff_host = "https://github.com/nf-core/test-datasets/raw/dualrnaseq/references/Human_gencode.v33_sub.gff3"
gff_host_tRNA = 'data/human.tRNAs.gff'
transcript_fasta_pathogen = "data/SL1344_sub_transcriptome.fasta"
transcript_fasta_host = "data/Human_gencode.v33_sub_transcriptome.fasta"
// gff_host_tRNA = 'data/human.tRNAs.gff'
pathogen_fasta_transcripts = "data/SL1344_sub_transcriptome.fasta"
host_fasta_transcripts = "data/Human_gencode.v33_sub_transcriptome.fasta"
libtype = ""

// fasta_pathogen = "https://github.com/nf-core/test-datasets/raw/dualrnaseq/references/SL1344_sub.fasta"
// gff_pathogen = "https://github.com/nf-core/test-datasets/raw/dualrnaseq/references/SL1344_sub.gff3"
fasta_pathogen = "data/SL1344_sub.fasta"
gff_pathogen = "data/SL1344_sub.gff3"
pathogen_fasta_genome = "data/SL1344_sub.fasta"
pathogen_gff = "data/SL1344_sub.gff3"

}
10 changes: 5 additions & 5 deletions lib/WorkflowDualrnaseq.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ class WorkflowDualrnaseq {
public static void initialise(params, log) {
genomeExistsError(params, log)


if (!params.fasta_host) {
log.error "Host genome fasta file not specified with e.g. '--fasta_host genome.fa' or via a detectable config file."
System.exit(1)
}
// TODO - could add in checks for host and pathogen fasta and gff files??
// if (!params.fasta_host) {
// log.error "Host genome fasta file not specified with e.g. '--fasta_host genome.fa' or via a detectable config file."
// System.exit(1)
// }
}

//
Expand Down
17 changes: 17 additions & 0 deletions modules/local/collate_processed_reads.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
process COLLATE_PROCESSED_READS {
label 'process_high'
conda "python=3.8.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'nfcore/dualrnaseq:dev' : 'nfcore/dualrnaseq:dev' }"

input:
file partial_results

output:
path 'total_processed_reads.tsv'

script:
"""
# Concatenate all partial result files into the master file
cat ${partial_results.join(' ')} > total_processed_reads.tsv
"""
}
2 changes: 1 addition & 1 deletion modules/local/extract_annotations.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ process EXTRACT_ANNOTATIONS {

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "extract_annotations_${organism}_${quantifier}"
def prefix = task.ext.prefix ?: "extracted_annotations_${organism}_${quantifier}"

"""
python $workflow.projectDir/bin/extract_annotations_from_gff.py \\
Expand Down
4 changes: 2 additions & 2 deletions modules/local/extract_processed_reads.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env nextflow
process EXTRACT_PROCESSED_READS {
tag "extract_process_reads_${process}"
tag "extract_processed_reads_${process}"
label 'process_high'

conda "python=3.8.3"
Expand Down Expand Up @@ -29,4 +30,3 @@ process EXTRACT_PROCESSED_READS {
fi
"""
}

3 changes: 1 addition & 2 deletions modules/local/replace_attribute.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// ONE FOR HOST_GENOME
// ONE FOR HOST_TRNA
process REPLACE_ATTRIBUTE_GFF_STAR_SALMON {
tag "repl_attribute_host_tRNA_gff"
tag "repl_GFF_attributes"

label 'process_high'

Expand Down
Empty file removed modules/local/test.nf
Empty file.
37 changes: 27 additions & 10 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,29 @@ params {
// A default run will include the following software:
// Fastqc, Cutadapt, Fastqc (after trimming), Salmon SA, mapping stats
// To add or remove software, change the following flags/params
run_salmon_selective_alignment = true
run_salmon_alignment_based_mode = false //true
skip_tools = null


//fasta_host = null
// --------------
// Reference files
// --------------

// Names of the organisms
host_organism = 'host' // Change to custom name if desired, ie Human_hela_cells
pathogen_organism = 'pathogen' // Change to custom name if desired, ie Salmonella_SL1344

// Host
fasta_host = null
transcript_fasta_host = null
host_fasta_genome = null
host_fasta_transcripts = null
host_gff = null

// Pathogen
transcript_fasta_pathogen = null
pathogen_fasta_genome = null
pathogen_fasta_transcripts = null
pathogen_gff = null

// Misc
genome = null
//genome = null


// --------------
Expand All @@ -69,15 +74,14 @@ params {
gene_feature_gff_to_quantify_host = ["exon", "tRNA"]
extract_annotations_host_salmon_feature = 'exon' //'quant'
extract_annotations_host_salmon_attribute = 'Parent' // currently it is required to pass value with capital letter
host_organism = 'host' // Change to custom name if desired, ie Human_hela_cells


// Pathogen
pathogen_gff_attribute = 'locus_tag'
gene_feature_gff_to_quantify_pathogen = ["gene", "sRNA", "tRNA", "rRNA"]
pathogen_organism = 'pathogen' // Change to custom name if desired, ie Salmonella_SL1344


// Misc
htseq_quantifier = 'quant'
// read_transcriptome_fasta_host_from_file = false
// read_transcriptome_fasta_pathogen_from_file = false

Expand All @@ -87,12 +91,15 @@ params {
// --------------
igenomes_base = 's3://ngi-igenomes/igenomes'
igenomes_ignore = false
igenome = null
ifasta = null


// --------------
// Software options
// --------------

mapping_stats = true
// Fastqc
fastqc_args = null

Expand All @@ -108,11 +115,21 @@ params {


// Salmon selective alignment
run_salmon_SA = true
salmon_sa_index_args = '-k 21'
salmon_sa_args = '--softclipOverhangs'

// Salmon alignment based mode
run_salmon_AB = false
salmon_ab_args = null

// STAR genome alignment
run_star = false

// Run HTSeq
run_htseq = false
htseq_quantifier = 'quant'



// --------------
Expand Down
Loading

0 comments on commit 9f0e3b0

Please sign in to comment.