These scripts replicate the results of the following manuscript
TBC
./scripts/prepare_genome.sh -v 94 -o reference_genome
-a = genome fasta file -g = anntation file -p = processors
./scripts/make_star_index.sh -a reference_genome/ensembl_chok1_genome.fa -g reference_genome/ensembl_chok1_genome.gtf -p 32
ls ../data/raw/ | sed -n 's/\.fastq.gz$//p' | cut -d_ -f1-2 | uniq > ../data/sample_names.txt
cat ../data/sample_names.txt | while read sample; do
./scripts/trim_adapter.sh -s $sample -i ../data/raw -o ../data/preprocessed/cutadapt&
done
cat ../data/sample_names.txt | while read sample; do
./scripts/trim_quality.sh -s $sample -i ../data/preprocessed/cutadapt -o../data/preprocessed
done
cat ../data/sample_names.txt | while read sample; do
./scripts/star_mapping.sh -s $sample -i ../data/preprocessed/paired -g reference_gene/star_index -o mapped -p 32
done
cat ../data/sample_names.txt | while read sample; do
./scripts/stringtie_star.sh -s $sample mapping -i mapped -g reference_genome/ensembl_chok1_genome.gtf -o stringtie_output -p 32
done
./scripts/stringtie_merge.sh -t stringtie_output reference_genome/ensembl_chok1_genome.gtf
cat ../data/sample_names.txt | while read sample; do
./scripts/run_TPM.sh -p 32 -s $sample -g stringtie_output/stringtie_merged.gtf -o lncrna_annotation/TPM -b ../alt_splicing_analysis/mapping/
done
./scripts/make_TPM_matrix.sh -s ../data/sample_names.txt -o lncrna_annotation/TPM/
The stringtie transcriptome assembly is used to predict lncRNAs using FEELNc
mkdir -p lncrna_annotation
The stringtie transcriptome assembly is used to predict lncRNAs using FEELNc
./scripts/run_FEELnc.sh -G reference_genome/ensembl_chok1_genome.gtf -g stringtie_output/stringtie_merged.gtf -f reference_genome/ensembl_chok1_genome.fa -o lncrna_annotation/FEELnc
./scripts/run_Transdecoder.sh -g lncrna_annotation/FEELnc/candidate_lncRNA.nocodpot.gtf -f reference_genome/ensembl_chok1_genome.fa -o lncrna_annotation/TRANSDECODER
./scripts/run_CPAT.sh -f lncrna_annotation/TRANSDECODER/candidate_lncRNA.nocodpot.cdna.fa -o lncrna_annotation/CPAT
./scripts/run_CPC2.sh -f lncrna_annotation/TRANSDECODER/candidate_lncRNA.nocodpot.cdna.fa -o lncrna_annotation/CPC2
./scripts/run_HMMscan.sh -t 32 -e 1e-5 -p lncrna_annotation/TRANSDECODER/longest_orfs.pep -o lncrna_annotation/PFAM
Assess FEELnc candiate lncRNAs for the presence of proteins, miRNAs, and other non-coding RNAs (e.g. snoRNAs) using BLAST
./scripts/run_BLAST.sh -t 32 -e 1e-5 -s lncrna_annotation/SWISSPROT -p lncrna_annotation/TRANSDECODER/longest_orfs.pep -m lncrna_annotation/MIRBASE -r lncrna_annotation/RFAM -n lncrna_annotation/TRANSDECODER/candidate_lncRNA.nocodpot.cdna.fa
Filter FEELNc output using additional protein potential calculators, PFAM search and BLAST against protein and RNA databases
Rscript R/filter_lncrna.R \
"lncrna_annotation/FEELnc/candidate_lncRNA.nocodpot.gtf" \
"lncrna_annotation/CPC2/CPC2.analysis.txt" \
"lncrna_annotation/CPAT/CPAT.analysis.txt" \
"lncrna_annotation/SWISSPROT/blastp.outfmt6" \
"lncrna_annotation/MIRBASE/blastn.outfmt6" \
"lncrna_annotation/PFAM/pfam_domain_transcripts" \
"lncrna_annotation/RFAM/blastn.outfmt6" \
"lncrna_annotation/TPM/transcript_tpm_all_samples.tsv" \
"lncrna_annotation/FEELnc/lncRNA_classes.txt" \
"stringtie_output/stringtie_merged.gtf" \
"lncrna_annotation"
cat ../data/sample_names.txt | while read sample; do
./scripts/htseq_count.sh -s $sample -m ../alt_splicing_analysis/mapping/ -g stringtie_output/stringtie_merged.gtf -o ./htseq_counts&
done
Rscript R/run_DEseq2.R \
"htseq_counts" \
"DESeq2_results" \
"lncrna_annotation/lncRNA_filtering.rData"