Skip to content
This repository has been archived by the owner on May 2, 2024. It is now read-only.

Default version, working directories, new script to filter contaminants #14

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions qiime2/filter_some_features.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#!/usr/bin/env bash

#$ -cwd
#$ -r n
#$ -j y
#$ -m ea
#$ -M [email protected]
#$ -l h_vmem=50G
#$ -l m_mem_free=20G

set -x
set -e
#set -u

source ~/.bashrc
conda activate qiime2-2019.4

if [ $# -ne 1 ]; then
echo "Usage: $0 MAPPING_FP"
echo "MAPPING_FP is the mapping file for Qiime2"
echo "Data files should be in a directory named "data_files""
exit 1
fi

MAPPING_FP=$1
WORK_DIR="/mnt/isilon/microbiome/analysis/danielsg/sehgal_run_6"

# See https://www.ostricher.com/2014/10/the-right-way-to-get-the-directory-of-a-bash-script/
SOURCE_REL="${BASH_SOURCE[0]}"
SOURCE_ABS=$(readlink -f "${SOURCE_REL}")
SOURCE_DIR=$(dirname "${SOURCE_ABS}")

### PATH TO Ceylan's CODE TO COMBINE I1 and I2
INDEX1_INDEX2_COMBINE_SCRIPT="${SOURCE_DIR}/combine_barcodes.py"

## Taxonomy classifier setup. Two classifiers are currently available:
## classifiers trained on full length and on 515F/806R region of Greengenes 13_8 99% OTUs
## These can be downloaded from https://data.qiime2.org/2017.9/common/gg-13-8-99-nb-classifier.qza (full length)
## or https://data.qiime2.org/2017.9/common/gg-13-8-99-515-806-nb-classifier.qza (515F/806R region)

#CLASSIFIER_FP="${HOME}/gg-13-8-99-nb-classifier.qza"
#CLASSIFIER_FP="gg-13-8-99-515-806-nb-classifier.qza" ## used for V4 region
CLASSIFIER_FP="${WORK_DIR}/gg-13-8-99-27-338-nb-classifier.qza" ## trained for V1V2 region truncated at 350 bp

DATA_DIR="${WORK_DIR}/Data"

#EMP_PAIRED_END_SEQUENCES_DIR="${DATA_DIR}/emp_paired_end_sequences"
#DATA_DIR="${DATA_DIR}/data_files"
DEMUX_DIR="${DATA_DIR}/demux_results"
DENOISE_DIR="${DATA_DIR}/denoising_results"
METRIC_DIR="${DATA_DIR}/core_metrics_results"

qiime taxa filter-table \
--i-table "${DENOISE_DIR}/table.qza" \
--i-taxonomy "${DENOISE_DIR}/taxonomy.qza" \
--p-exclude "Corynebacterium,Propionibacterium,Streptococcus,Acidovorax,Bradyrhizobium,Comamonas,Pelomonas,Pseudomonas,Ralstonia" \
--o-filtered-table "${DENOISE_DIR}/table-no-contaminants.qza"
#
#qiime feature-table summarize \
# --i-table "${DENOISE_DIR}/table-no-contaminants.qza" \
# --o-visualization "${DENOISE_DIR}/table-no-contaminants.qzv" \
# --m-sample-metadata-file subset_metadata.tsv
#
qiime tools export \
--input-path "${DENOISE_DIR}/table-no-contaminants.qza" \
--output-path "${DENOISE_DIR}/table-no-contaminants"

qiime diversity alpha-phylogenetic \
--i-phylogeny "${DENOISE_DIR}/rooted-tree.qza" \
--i-table "${DENOISE_DIR}/table-no-contaminants.qza" \
--p-metric faith_pd \
--o-alpha-diversity "${METRIC_DIR}/faith_pd_vector_no_contam.qza"

qiime tools export \
--input-path "${METRIC_DIR}/faith_pd_vector_no_contam.qza" \
--output-path "${METRIC_DIR}/faith_no_contam"

qiime diversity beta-phylogenetic \
--i-phylogeny "${DENOISE_DIR}/rooted-tree.qza" \
--i-table "${DENOISE_DIR}/table-no-contaminants.qza" \
--p-metric weighted_unifrac \
--o-distance-matrix "${METRIC_DIR}/weighted_unifrac_distance_matrix_no_contam.qza"

qiime tools export \
--input-path "${METRIC_DIR}/weighted_unifrac_distance_matrix_no_contam.qza" \
--output-path "${METRIC_DIR}/wu_no_contam"

qiime diversity beta-phylogenetic \
--i-phylogeny "${DENOISE_DIR}/rooted-tree.qza" \
--i-table "${DENOISE_DIR}/table-no-contaminants.qza" \
--p-metric unweighted_unifrac \
--o-distance-matrix "${METRIC_DIR}/unweighted_unifrac_distance_matrix_no_contam.qza"

qiime tools export \
--input-path "${METRIC_DIR}/unweighted_unifrac_distance_matrix_no_contam.qza" \
--output-path "${METRIC_DIR}/uu_no_contam"

###=====================
### BIOM CONVERT
###=====================
if [ ! -e "${DENOISE_DIR}/table-no-contaminants/feature-table.tsv" ]; then
biom convert \
-i "${DENOISE_DIR}/table-no-contaminants/feature-table.biom" \
-o "${DENOISE_DIR}/table-no-contaminants/feature-table.tsv" \
--to-tsv
fi

cd "${DENOISE_DIR}"

cp taxonomy/taxonomy.tsv biom-taxonomy.tsv

# had to vim biom-taxonomy.tsv and add
# "#OTUID taxonomy confidence" (without quotes)
# line to make it recognize the header and correctly add the taxonomy
# see here: https://forum.qiime2.org/t/exporting-and-modifying-biom-tables-e-g-adding-taxonomy-annotations/3630

biom add-metadata \
-i table-no-contaminants/feature-table.biom \
-o table-with-taxonomy.biom \
--observation-metadata-fp biom-taxonomy.tsv \
--sc-separated taxonomy

biom convert \
-i table-with-taxonomy.biom \
-o table-with-taxa.tsv \
--to-tsv --header-key taxonomy

echo "****"
echo "Done!"
echo "If nothing happened,"
echo "You may already have result files, check your directories!"
16 changes: 9 additions & 7 deletions qiime2/qiime2_pipeline.bash
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@

set -x
set -e
#set -u

#source ~/.bashrc
#conda activate qiime2-2018.11

if [ $# -ne 1 ]; then
echo "Usage: $0 MAPPING_FP"
Expand All @@ -15,22 +11,28 @@ if [ $# -ne 1 ]; then
fi

MAPPING_FP=$1
WORK_DIR="$(dirname ${MAPPING_FP})"
#PRJ_DIR set in run_qiime2.sh
WORK_DIR="${PRJ_DIR}/Data"

# See https://www.ostricher.com/2014/10/the-right-way-to-get-the-directory-of-a-bash-script/
SOURCE_REL="${BASH_SOURCE[0]}"
SOURCE_ABS=$(readlink -f "${SOURCE_REL}")
SOURCE_DIR=$(dirname "${SOURCE_ABS}")

### PATH TO Ceylan's CODE TO COMBINE I1 and I2
INDEX1_INDEX2_COMBINE_SCRIPT="$(dirname ${SOURCE_DIR})/combine_barcodes.py"
INDEX1_INDEX2_COMBINE_SCRIPT="${SOURCE_DIR}/combine_barcodes.py"

if [ ! -e $INDEX1_INDEX2_COMBINE_SCRIPT ]; then
echo "Cant find combine_barcodes script here: $INDEX1_INDEX2_COMBINE_SCRIPT"
exit 1
fi

## Taxonomy classifier setup. Two classifiers are currently available:
## classifiers trained on full length and on 515F/806R region of Greengenes 13_8 99% OTUs
## These can be downloaded from https://data.qiime2.org/2017.9/common/gg-13-8-99-nb-classifier.qza (full length)
## or https://data.qiime2.org/2017.9/common/gg-13-8-99-515-806-nb-classifier.qza (515F/806R region)

CLASSIFIER_FP="gg-13-8-99-nb-classifier.qza"
CLASSIFIER_FP="${PRJ_DIR}/gg-13-8-99-nb-classifier.qza"
#CLASSIFIER_FP="gg-13-8-99-515-806-nb-classifier.qza" ## used for V4 region
#CLASSIFIER_FP="gg-13-8-99-27-338-nb-classifier.qza" ## trained for V1V2 region truncated at 350 bp

Expand Down
8 changes: 6 additions & 2 deletions qiime2/run_qiime2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#$ -l m_mem_free=20G

source ~/.bashrc
conda activate qiime2-2018.11
conda activate qiime2-2019.4

./qiime2_pipeline.bash Robinson_Run_1_Metadata.tsv
export PRJ_DIR="/mnt/isilon/microbiome/analysis/danielsg/sehgal_run_6"

export METADATA="${PRJ_DIR}/Data/sehgal_run_6_mapping_file.tsv"

./qiime2_pipeline.bash $METADATA