Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add workflow to detect structural variants #96

Merged
merged 5 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/build-hic-breakfinder-dockerfile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) 2023 Roberto Rossini ([email protected])
# SPDX-License-Identifier: MIT

name: Build hic_breakfinder Dockerfile

on:
push:
branches: [ main ]
paths:
- ".github/workflows/build-hic-breakfinder*dockerfile.yml"
- ".github/workflows/build-dockerfile.yml"
- "containers/hic-breakfinder*.Dockerfile"
pull_request:
branches: [ main ]
paths:
- ".github/workflows/build-hic-breakfinder*dockerfile.yml"
- ".github/workflows/build-dockerfile.yml"
- "containers/hic-breakfinder*.Dockerfile"

jobs:
build-hic-breakfinder-dockerfile:
name: Build hic_breakfinder Dockerfile
uses: paulsengroup/2022-mcf10a-cancer-progression/.github/workflows/build-dockerfile.yml@main
with:
dockerfile-glob: "containers/hic-breakfinder*.Dockerfile"

permissions:
contents: read
packages: write
29 changes: 29 additions & 0 deletions .github/workflows/build-hictrans-dockerfile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) 2023 Roberto Rossini ([email protected])
# SPDX-License-Identifier: MIT

name: Build HiCTrans Dockerfile

on:
push:
branches: [ main ]
paths:
- ".github/workflows/build-hictrans*dockerfile.yml"
- ".github/workflows/build-dockerfile.yml"
- "containers/hictrans*.Dockerfile"
pull_request:
branches: [ main ]
paths:
- ".github/workflows/build-hictrans*dockerfile.yml"
- ".github/workflows/build-dockerfile.yml"
- "containers/hictrans*.Dockerfile"

jobs:
build-hictrans-dockerfile:
name: Build HiCTrans Dockerfile
uses: paulsengroup/2022-mcf10a-cancer-progression/.github/workflows/build-dockerfile.yml@main
with:
dockerfile-glob: "containers/hictrans*.Dockerfile"

permissions:
contents: read
packages: write
31 changes: 31 additions & 0 deletions .github/workflows/build-hint-dockerfile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) 2023 Roberto Rossini ([email protected])
# SPDX-License-Identifier: MIT

name: Build HiNT Dockerfile

on:
push:
branches: [ main ]
paths:
- ".github/workflows/build-hint*dockerfile.yml"
- ".github/workflows/build-dockerfile.yml"
- "containers/hint*.Dockerfile"
- "containers/patches/hint.patch"
pull_request:
branches: [ main ]
paths:
- ".github/workflows/build-hint*dockerfile.yml"
- ".github/workflows/build-dockerfile.yml"
- "containers/hint*.Dockerfile"
- "containers/patches/hint.patch"

jobs:
build-hint-dockerfile:
name: Build HiNT Dockerfile
uses: paulsengroup/2022-mcf10a-cancer-progression/.github/workflows/build-dockerfile.yml@main
with:
dockerfile-glob: "containers/hint*.Dockerfile"

permissions:
contents: read
packages: write
35 changes: 35 additions & 0 deletions bin/compute_restriction_sites_for_hint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python3

# Copyright (C) 2023 Roberto Rossini <[email protected]>
#
# SPDX-License-Identifier: MIT

import argparse

import bioframe as bf
import pandas as pd


def make_cli():
cli = argparse.ArgumentParser()

cli.add_argument("fna", type=str)
cli.add_argument("enzymes", nargs="+", type=str)

return cli


if __name__ == "__main__":
args = vars(make_cli().parse_args())

dfs = []

fna = bf.load_fasta(args["fna"])
for enz in args["enzymes"]:
dfs.append(bf.digest(fna, enz))

df = pd.concat(dfs)[["chrom", "start"]].drop_duplicates(keep="first")

groups = df.groupby("chrom")["start"].aggregate(list)
for chrom, sites in groups.apply(lambda l: " ".join(str(x) for x in sorted(l))).items():
print(chrom, sites)
57 changes: 57 additions & 0 deletions configs/detect_structural_variants.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (C) 2022 Roberto Rossini <[email protected]>
//
// SPDX-License-Identifier: MIT

params {
data_dir = 'data'
input_dir = "${data_dir}/input"
output_dir = "${data_dir}/output/structural_variants"

mcools = "${data_dir}/output/nfcore_hic/mcools/hg38*_merged.mcool"
nfcore_hic_samplesheet = "${data_dir}/input/nfcore_hic_samplesheet.csv"
alignment_dir = "${data_dir}/output/nfcore_hic/alignments"

ref_genome_name = 'hg38'
reference_genome = "${data_dir}/input/${ref_genome_name}/${ref_genome_name}.filtered.fa"
blacklist = "${data_dir}/input/${ref_genome_name}/${ref_genome_name}_blacklist.bed.gz"

hictrans_resolution = 10000

hint_resolution = 50 // kb
hint_refzip = "${data_dir}/input/hint/refData_${ref_genome_name}.zip"
hint_backdirzip = "${data_dir}/input/hint/backgroundMatrices_${ref_genome_name}.zip"
restriction_enzymes = 'DpnII HindIII'
restriction_enzymes_alias = 'arima'

hic_breakfinder_quality_score = 30
hic_breakfinder_expected_intra = "${data_dir}/input/hic_breakfinder/intra_expect_100kb.${ref_genome_name}.txt"
hic_breakfinder_expected_inter = "${data_dir}/input/hic_breakfinder/inter_expect_1Mb.${ref_genome_name}.txt"
}

process {
container = 'ghcr.io/paulsengroup/2022-mcf10a-cancer-progression/hictrans:e26ad6a'
withName:run_hictrans {
memory = 40.GB
}
withName:digest_genome_for_hint {
container = 'ghcr.io/paulsengroup/2022-mcf10a-cancer-progression/hint:2.2.8'
}
withName:run_hint_cnv {
memory = 6.GB
container = 'ghcr.io/paulsengroup/2022-mcf10a-cancer-progression/hint:2.2.8'
}
withName:run_hint_tl {
memory = 40.GB
container = 'ghcr.io/paulsengroup/2022-mcf10a-cancer-progression/hint:2.2.8'
}
withName:filter_mappings {
container = 'ghcr.io/paulsengroup/2022-mcf10a-cancer-progression/samtools:1.17'
}
withName:merge_bams {
container = 'ghcr.io/paulsengroup/2022-mcf10a-cancer-progression/samtools:1.17'
}
withName:run_hic_breakfinder {
memory = 165.GB
container = 'ghcr.io/paulsengroup/2022-mcf10a-cancer-progression/hic-breakfinder:30a0dcc'
}
}
71 changes: 71 additions & 0 deletions containers/hic-breakfinder__v30a0dcc.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (C) 2023 Roberto Rossini <[email protected]>
#
# SPDX-License-Identifier: MIT

FROM mambaorg/micromamba:1.4.3 AS builder

ARG CONTAINER_VERSION

RUN if [ -z "$CONTAINER_VERSION" ]; then echo "Missing CONTAINER_VERSION --build-arg" && exit 1; fi

ARG MAMBA_DOCKERFILE_ACTIVATE=1

ARG HIC_BREAKFINDER_GIT='https://github.com/dixonlab/hic_breakfinder.git'
ARG HIC_BREAKFINDER_TAG="${CONTAINER_VERSION}"

RUN micromamba install -y \
-c conda-forge \
-c bioconda \
bamtools \
eigen \
git \
gxx \
make

RUN cd /tmp \
&& git clone "$HIC_BREAKFINDER_GIT" \
&& cd hic_breakfinder \
&& git checkout "$HIC_BREAKFINDER_TAG"

RUN cd hic_breakfinder \
&& CXXFLAGS='-isystem /opt/conda/include/bamtools -isystem /opt/conda/include/eigen3 -fpermissive' \
./configure \
&& make -j $(nproc)


FROM mambaorg/micromamba:1.4.3 AS base

ARG CONTAINER_VERSION

RUN if [ -z "$CONTAINER_VERSION" ]; then echo "Missing CONTAINER_VERSION --build-arg" && exit 1; fi

ARG MAMBA_DOCKERFILE_ACTIVATE=1
ARG HINT_VERSION="${CONTAINER_VERSION}"


RUN micromamba install -y \
-c conda-forge \
-c bioconda \
bamtools \
procps-ng \
&& micromamba clean --all -y

COPY --from=builder --chown=nobody:nogroup /tmp/hic_breakfinder/src/hic_breakfinder /usr/local/bin/
COPY --from=builder --chown=nobody:nogroup /tmp/hic_breakfinder/LICENSE /usr/local/share/licenses/hic_breakfinder/

WORKDIR /data

ENV PATH="/opt/conda/bin:$PATH"
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
CMD ["/usr/local/bin/hic_breakfinder"]
WORKDIR /data

RUN whereis hic_breakfinder

LABEL org.opencontainers.image.authors='Roberto Rossini <[email protected]>'
LABEL org.opencontainers.image.url='https://github.com/paulsengroup/2022-mcf10a-cancer-progression'
LABEL org.opencontainers.image.documentation='https://github.com/paulsengroup/2022-mcf10a-cancer-progression'
LABEL org.opencontainers.image.source='https://github.com/paulsengroup/2022-mcf10a-cancer-progression'
LABEL org.opencontainers.image.licenses='MIT'
LABEL org.opencontainers.image.title="${CONTAINER_TITLE:-hic_breakfinder}"
LABEL org.opencontainers.image.version="${CONTAINER_VERSION:-latest}"
83 changes: 83 additions & 0 deletions containers/hictrans__ve26ad6a.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright (C) 2023 Roberto Rossini <[email protected]>
#
# SPDX-License-Identifier: MIT

FROM ubuntu:22.04 AS download

ARG CONTAINER_VERSION
ARG HICTRANS_VERSION="${CONTAINER_VERSION}"

RUN apt-get update \
&& apt-get install -y git \
&& cd /tmp \
&& git clone https://github.com/ay-lab/HiCtrans.git \
&& cd HiCtrans \
&& git checkout "$HICTRANS_VERSION"


FROM mambaorg/micromamba:1.4.3 AS base

ARG CONTAINER_VERSION

RUN if [ -z "$CONTAINER_VERSION" ]; then echo "Missing CONTAINER_VERSION --build-arg" && exit 1; fi

ARG MAMBA_DOCKERFILE_ACTIVATE=1
ARG HICTRANS_VERSION="${CONTAINER_VERSION}"


RUN micromamba install -y \
-c conda-forge \
-c bioconda \
cooler \
pigz \
procps-ng \
r-catools \
r-changepoint \
r-data.table \
r-depmixs4 \
r-deoptimr \
r-hashmap \
r-optparse \
r-r.utils \
r-rcpp \
&& micromamba clean --all -y

COPY --from=download --chown=nobody:nogroup /tmp/HiCtrans/hictrans.v3.R /opt/hictrans/

USER root
RUN mkdir /opt/hictrans/bin \
&& printf '%s\n%s "$@"' \
'#!/usr/bin/env sh' \
'Rscript /opt/hictrans/hictrans.v3.R' > /opt/hictrans/bin/hictrans \
&& chown -R nobody:nogroup /opt/hictrans \
&& chmod 755 /opt/hictrans/bin/hictrans
USER mambauser

WORKDIR /data

ENV PATH="/opt/conda/bin:/opt/hictrans/bin:$PATH"
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
CMD ["/opt/hictrans/bin/hictrans"]
WORKDIR /data

# We have to explicitly set these R_* env variables in order for the
# container to work correctly when running using Apptainer
ENV R_HOME=/opt/conda/lib/R
ENV R_LIBS=/opt/conda/lib/R/lib
ENV R_ENVIRON=/opt/conda/lib/R/etc/Renviron
ENV R_HISTFILE=/tmp/.Rhistory

ENV R_HOME_USER='$R_HOME'
ENV R_LIBS_USER='$R_LIBS'
ENV R_ENVIRON_USER='$R_ENVIRON'
ENV R_PROFILE_USER=/opt/conda/lib/R/etc/.Rprofile

RUN hictrans --help

LABEL org.opencontainers.image.authors='Roberto Rossini <[email protected]>'
LABEL org.opencontainers.image.url='https://github.com/paulsengroup/2022-mcf10a-cancer-progression'
LABEL org.opencontainers.image.documentation='https://github.com/paulsengroup/2022-mcf10a-cancer-progression'
LABEL org.opencontainers.image.source='https://github.com/paulsengroup/2022-mcf10a-cancer-progression'
LABEL org.opencontainers.image.licenses='MIT'
LABEL org.opencontainers.image.title="${CONTAINER_TITLE:-hictrans}"
LABEL org.opencontainers.image.version="${CONTAINER_VERSION:-latest}"
Loading
Loading