Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Just teraseq #41

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 211 additions & 7 deletions teraseq/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,212 @@
FROM ghcr.io/binpash/teraseq20:latest
# Download base image ubuntu 16.04
FROM ubuntu:16.04

# LABEL about the custom image
LABEL maintainer="[email protected]"
LABEL version="0.3"
LABEL description="This is custom Docker Image for \
analysis of TERA-Seq publication (DOI: https://doi.org/10.1093/nar/gkab713)."

# Disable Prompt During Packages Installation
ARG DEBIAN_FRONTEND=noninteractive

# Set default shell
SHELL ["/bin/bash", "-c"]

### System-wide requirements; cpanminus is not required if Perl uses virtual environment method; g++, zlib1g-dev, and bzip2 are required only for Nanopolish
RUN apt-get update \
&& apt-get install -y git gcc make wget g++ zlib1g-dev bzip2 \
&& rm -rf /var/lib/apt/lists/*

### Main GitHub repo
WORKDIR /root
COPY . TERA-Seq_manuscript

### Install Miniconda3
ENV PATH "/root/miniconda3/bin:${PATH}"
ARG PATH="/root/miniconda3/bin:${PATH}"

RUN wget \
https://repo.anaconda.com/miniconda/Miniconda3-py37_23.1.0-1-Linux-x86_64.sh -O Miniconda3.sh \
&& mkdir /root/.conda \
&& bash Miniconda3.sh -b \
&& rm -f Miniconda3.sh
#RUN conda --version

## Install Mamba for faster installation
#RUN conda install -c conda-forge mamba

# Get Conda yml and install environment
#RUN mamba env create -f /usr/local/TERA-Seq_manuscript/teraseq-env.yml
RUN conda env create -f /root/TERA-Seq_manuscript/teraseq-env.yml

# Increase default FastQC RAM
RUN sed -i 's/-Xmx250m/-Xmx5g/g' /root/miniconda3/envs/teraseq/opt/fastqc-*/fastqc

#ENV PATH="${PATH}:/root/miniconda3/envs/teraseq/bin"

RUN ln -s /root/miniconda3/envs/teraseq/bin/R /bin/R \
&& ln -s /root/miniconda3/envs/teraseq/bin/curl /bin/curl

### Save default Conda path
RUN sed -i '/CONDA_PREFIX/d' /root/TERA-Seq_manuscript/PARAMS.sh \
&& echo -e "CONDA_PREFIX=\"/root/miniconda3\"" >> /root/TERA-Seq_manuscript/PARAMS.sh

### Perl
WORKDIR /root/TERA-Seq_manuscript/tools

## System-wide install (option 1)
# RUN cpanm inc::Module::Install \
# && cpanm autodie \
# && cpanm DBI \
# && cpanm Devel::Size \
# && cpanm Getopt::Long::Descriptive \
# && cpanm IO::File \
# && cpanm IO::Interactive \
# && cpanm IO::Uncompress::Gunzip \
# && cpanm Params::Validate \
# && cpanm Params::Util \
# && cpanm Sub::Install \
# && cpanm Modern::Perl \
# && cpanm --force MooseX::App::Simple \
# && cpanm --force MooseX::App::Command \
# && cpanm --force MooseX::Getopt::Meta::Attribute::Trait::NoGetopt
#
# WORKDIR /usr/local/share/perl/5.22.1
#
# RUN git clone --recursive https://github.com/genoo/GenOO.git GenOO_git
# WORKDIR /usr/local/share/perl/5.22.1/GenOO_git
# RUN git reset 6527029 --hard
# WORKDIR /usr/local/share/perl/5.22.1
# RUN mkdir GenOO \
# && cp -r GenOO_git/lib/GenOO/* GenOO/
#
# RUN cpanm CLIPSeqTools
#
# RUN wget https://raw.githubusercontent.com/mourelatos-lab/TERA-Seq_manuscript/main/misc/GenOOx/Data/File/SAMminimap2.pm -O /usr/local/share/perl/5.22.1/GenOOx/Data/File/SAMminimap2.pm
# RUN mkdir /usr/local/share/perl/5.22.1/GenOOx/Data/File/SAMminimap2 \
# && wget https://raw.githubusercontent.com/mourelatos-lab/TERA-Seq_manuscript/main/misc/GenOOx/Data/File/SAMminimap2/Record.pm -O /usr/local/share/perl/5.22.1/GenOOx/Data/File/SAMminimap2/Record.pm

## Virtual environment install (option 2)
# Export Conda perl lib path (mainly for local::lib module)
ENV PERL5LIB "/root/miniconda3/envs/teraseq/lib/site_perl/5.26.2/:${PERL5LIB}"
ARG PERL5LIB="/root/miniconda3/envs/teraseq/lib/site_perl/5.26.2/:${PERL5LIB}"

RUN git clone https://github.com/jizhang/perl-virtualenv.git \
&& cd perl-virtualenv/ \
&& git reset f931774 --hard \
&& chmod u+x virtualenv.pl \
&& ./virtualenv.pl teraseq \
&& . teraseq/bin/activate \
&& curl -L https://cpanmin.us/ -o teraseq/bin/cpanm \
&& chmod +x teraseq/bin/cpanm

RUN . perl-virtualenv/teraseq/bin/activate \
&& cpanm inc::Module::[email protected] \
&& cpanm [email protected] \
&& cpanm [email protected] \
&& cpanm Devel::[email protected] \
&& cpanm Getopt::Long::[email protected] \
&& cpanm IO::[email protected] \
&& cpanm IO::[email protected] \
&& cpanm --force IO::Uncompress::Gunzip \
&& cpanm Params::[email protected] \
&& cpanm Params::[email protected] \
&& cpanm Sub::[email protected] \
&& cpanm Modern::[email protected] \
&& cpanm --force MooseX::App::[email protected] \
&& cpanm --force MooseX::App::Command \
&& cpanm --force MooseX::Getopt::Meta::Attribute::Trait::[email protected]

RUN git clone --recursive https://github.com/genoo/GenOO.git perl-virtualenv/teraseq/lib/perl5/GenOO_git \
&& cd perl-virtualenv/teraseq/lib/perl5/GenOO_git/ \
&& git reset 6527029 --hard \
&& cd ../ \
&& mkdir GenOO \
&& cp -r GenOO_git/lib/GenOO/* GenOO/

# Install specific version of Perl module https://stackoverflow.com/questions/260593/how-can-i-install-a-specific-version-of-a-set-of-perl-modules
RUN . perl-virtualenv/teraseq/bin/activate \
&& cpanm --force [email protected] \
&& cp -r /root/TERA-Seq_manuscript/misc/GenOOx/* perl-virtualenv/teraseq/lib/perl5/GenOOx/

################################################################################
### Nanopolish
# Default version
RUN git clone --recursive https://github.com/jts/nanopolish.git \
&& mv nanopolish nanopolish-480fc85 \
&& cd nanopolish-480fc85/ \
&& git reset 480fc85 --hard \
&& sed -i 's#http://bitbucket.org/eigen/eigen/get/$(EIGEN_VERSION).tar.bz2#https://gitlab.com/libeigen/eigen/-/archive/$(EIGEN_VERSION)/eigen-$(EIGEN_VERSION).tar.bz2#' Makefile \
&& sed -i 's/tar -xjf $(EIGEN_VERSION).tar.bz2/tar -xjf eigen-$(EIGEN_VERSION).tar.bz2/' Makefile \
&& sed -i 's/eigen-eigen-\*/eigen-$(EIGEN_VERSION)/' Makefile \
# && sed -i '27 i EIGEN_VERSION_MV ?= d9c80169e091a2c6e75ceb509f81764d22cf6a63' Makefile \
# && sed -i 's/mv\ eigen-\$(EIGEN_VERSION)/mv\ eigen-\$(EIGEN_VERSION_MV)/' Makefile \
&& rm -rf fast5 \
&& git clone https://github.com/mateidavid/fast5.git \
&& cd fast5/ \
&& git reset 18d6e34 --hard \
&& cd ../ \
&& rm -rf htslib \
&& git clone --recursive https://github.com/samtools/htslib.git \
&& cd htslib/ \
&& git reset 3dc96c5 --hard \
&& cd ../ \
&& make \
&& ln -s $(pwd)/nanopolish /root/miniconda3/envs/teraseq/bin/nanopolish

# New version with polya hmm scripts
RUN git clone --recursive https://github.com/jts/nanopolish.git \
&& mv nanopolish nanopolish-ab9722b \
&& cd nanopolish-ab9722b/ \
&& git reset ab9722b --hard

################################################################################
### Other dependencies
# Make sure to activate Conda
SHELL ["conda", "run", "-n", "teraseq", "/bin/bash", "-c"]

## GeneCycle
#RUN Rscript -e 'install.packages("GeneCycle", repos="https://cloud.r-project.org")'
RUN Rscript -e 'install.packages(c("longitudinal", "fdrtool"), repos = "http://cran.us.r-project.org"); install.packages("https://cran.r-project.org/src/contrib/GeneCycle_1.1.5.tar.gz", repos=NULL, type="source")'

## Cutadapt
RUN mkdir cutadapt-2.5 \
&& cd cutadapt-2.5/ \
&& python3 -m venv venv \
&& source venv/bin/activate \
&& python3 -m pip install --upgrade pip \
&& pip3 install cutadapt==2.5 pysam numpy pandas matplotlib seaborn \
&& which cutadapt

## DeepTools
RUN mkdir deepTools-3.5.0 \
&& cd deepTools-3.5.0/ \
&& python3 -m venv venv \
&& source venv/bin/activate \
&& python3 -m pip install --upgrade pip \
&& pip3 install wheel \
&& pip3 install deeptools==3.5.0 \
&& deeptools --version

## ONT-Fast5-API
RUN mkdir ont-fast5-api \
&& cd ont-fast5-api/ \
&& python3 -m venv venv \
&& source venv/bin/activate \
&& pip install ont-fast5-api==3.3.0 h5py seaborn

## Jvarkit
RUN git clone "https://github.com/lindenb/jvarkit.git" \
&& mv jvarkit jvarkit-014d3e9 \
&& cd jvarkit-014d3e9/ \
&& git reset 014d3e9 --hard \
&& ./gradlew biostar84452 \
&& mkdir $CONDA_PREFIX/share/jvarkit \
&& ln -s $(pwd)/dist/biostar84452.jar /root/miniconda3/envs/teraseq/share/jvarkit/remove-softlip.jar


# Add utils dir to PATH
ENV PATH "/usr/local/TERA-Seq_manuscript/tools/utils:${PATH}"

WORKDIR /root/TERA-Seq_manuscript
COPY . .
RUN mv -f activate.d/* /root/miniconda3/envs/teraseq/etc/conda/activate.d
RUN mv -f deactivate.d/* /root/miniconda3/envs/teraseq/etc/conda/deactivate.d
WORKDIR /root/TERA-Seq_manuscript/data
RUN ./run.sh
WORKDIR /root/TERA-Seq_manuscript/samples
Empty file added teraseq/cleanup.sh
Empty file.
65 changes: 65 additions & 0 deletions teraseq/misc/GenOOx/Data/File/SAMminimap2.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# POD documentation - main docs before the code

=head1 NAME

GenOOx::Data::File::SAMminimap2 - GenOO framework extension to read SAM files created by the Minimap2 aligner

=head1 SYNOPSIS

GenOO framework extension to read SAM files created by the Minimap2 aligner.
Include it in your script and ask GenOO SAM parser to use it.

use GenOOx::Data::File::SAMminimap2::Record;

my $file_parser = GenOO::Data::File::SAM->new(
file => 'file.sam',
records_class => 'GenOOx::Data::File::SAMminimap2::Record'
);

while (my $record = $file_parser->next_record) {
# $record is now an instance of GenOOx::Data::File::SAMminimap2::Record.
print $record->cigar."\n"; # name
print $record->flag."\n"; # flag
print $record->number_of_mappings."\n"; # new stuff not present by default
print $record->best_hit."\n"; # new stuff not present by default
print $record->number_of_best_hits."\n"; # new stuff not present by default
}


=head1 DESCRIPTION

The GenOO framework SAM parser avoids code that is unique to specific programs and makes no assumptions for the optional fields in a SAM file. This module is a plugin for the GenOO framework and provides the functionality for reading SAM files generated from the Minimap2 aligner. The
module has been created on top of the generic GenOO SAM parser and to use it just include it in your scripts and ask GenOO SAM parser to use it.

=head1 EXAMPLES

# Create a parser
my $file_parser = GenOO::Data::File::SAM->new(
file => 'file.sam',
records_class => 'GenOOx::Data::File::SAMminimap2::Record'
);

# Loop on the records of the file
while (my $record = $file_parser->next_record) {
# $record is now an instance of GenOOx::Data::File::SAMminimap2::Record.
print $record->cigar."\n"; # name
print $record->flag."\n"; # flag
print $record->number_of_mappings."\n"; # new stuff not present by default in GenOO
print $record->best_hit."\n"; # new stuff not present by default in GenOO
print $record->number_of_best_hits."\n"; # new stuff not present by default in GenOO
}

=cut

# Let the code begin...

package GenOOx::Data::File::SAMminimap2;
$GenOOx::Data::File::SAMminimap2::VERSION = '0.0.1';

#######################################################################
####################### Load External modules #####################
#######################################################################
use Modern::Perl;


1;
105 changes: 105 additions & 0 deletions teraseq/misc/GenOOx/Data/File/SAMminimap2/Record.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# POD documentation - main docs before the code

=head1 NAME

GenOOx::Data::File::SAMminimap2::Record - Represents a record of a SAM format file generated by Minimap2

=head1 SYNOPSIS

# Object representing a record of a sam file

# To initialize
my $sam_record = GenOOx::Data::File::SAMminimap2::Record->new(
fields => [qname,flag, rname, pos, mapq, cigar,
rnext, pnext, tlen, seq, qual, tags]
);


=head1 DESCRIPTION

This object represents a record of a sam file generated by Minimap2 and offers methods for accessing the different
attributes. It implements several additional methods that transform original attributes in more manageable
attributes. eg. from the FLAG attribute the actual strand is extracted etc.

=head1 EXAMPLES

# Check if the record corresponds to a match
my $mapped = $sam_record->is_mapped;

# Check if the record corresponds to a non match
my $unmapped = $sam_record->is_unmapped;

# Parse the FLAG attribute and return 1 or -1 for the strand
my $strand = $sam_record->strand;

=cut

# Let the code begin...

package GenOOx::Data::File::SAMminimap2::Record;
$GenOOx::Data::File::SAMminimap2::Record::VERSION = '0.0.1';

#######################################################################
####################### Load External modules #####################
#######################################################################
use Moose;
use namespace::autoclean;


#######################################################################
############################ Inheritance ##########################
#######################################################################
extends 'GenOO::Data::File::SAM::Record';


#######################################################################
######################## Interface Methods ########################
#######################################################################
sub number_of_optimal_hits {
my ($self) = @_;

return $self->tag('X0:i') || 0;
}

sub number_of_suboptimal_hits {
my ($self) = @_;

return $self->tag('X1:i') || 0;
}

sub number_of_mappings {
my ($self) = @_;

return $self->number_of_optimal_hits + $self->number_of_suboptimal_hits;
}

sub best_hit {
my ($self) = @_;

return $self->tag('XP:i') || 0;
}

sub number_of_best_hits {
my ($self) = @_;

return $self->tag('XN:i') || 0;
}

sub alternative_mappings {
my ($self) = @_;

my @alternative_mappings;
my $value = $self->tag('XA:Z');
if (defined $value) {
@alternative_mappings = split(/;/,$value);
}
return @alternative_mappings;
}


#######################################################################
############################ Finalize #############################
#######################################################################
__PACKAGE__->meta->make_immutable;

1;
Empty file added teraseq/run.sh
Empty file.
1 change: 1 addition & 0 deletions teraseq/samples/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading
Loading