diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { @@ -9,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b6b31907..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1ea3fb25..e0446d52 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -83,7 +83,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..23e13ac0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 84743a89..029731bf 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,16 +8,13 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests - if: github.repository == 'nf-core/rnadeseq' + if: github.repository == 'qbic-pipelines/rnadeseq' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -31,9 +28,9 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index a1300168..40c155ab 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,13 +5,13 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests - if: github.repository == 'nf-core/rnadeseq' + if: github.repository == 'qbic-pipelines/rnadeseq' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -25,9 +25,9 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 9daf0509..c38a1cb0 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -15,11 +15,11 @@ jobs: run: | "{ [[ ${{github.event.pull_request.head.repo.full_name }} == qbic-pipelines/rnadeseq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]" - # If the above check failed, post a comment on the PR explaining the failure + # If the above check failed, post a comment on the PR explaining the failure {%- raw %} # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: @@ -41,4 +41,4 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + allow-repeats: false {%- endraw %} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e40f2eb6..11ca23a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,6 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -40,14 +39,14 @@ jobs: environment.yml - name: Build new docker image if: env.MATCHED_FILES - run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.2 + run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.3 # Change the version above and the third version below before/after release - name: Pull docker image if: ${{ !env.MATCHED_FILES }} run: | docker pull ghcr.io/qbic-pipelines/rnadeseq:dev - docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.2 + docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 @@ -94,14 +93,14 @@ jobs: environment.yml - name: Build new docker image if: env.MATCHED_FILES - run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.2 + run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.3 # Change the version above and the third version below before/after release - name: Pull docker image if: ${{ !env.MATCHED_FILES }} run: | docker pull ghcr.io/qbic-pipelines/rnadeseq:dev - docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.2 + docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..2d20d644 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 588b4f43..a2d77dca 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,19 +4,26 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && contains(github.event.comment.body, '@nf-core-bot fix linting') && - github.repository == 'nf-core/rnadeseq' + github.repository == 'qbic-pipelines/rnadeseq' runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/qbic-pipelines/rnadeseq/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4bc..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,34 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.8" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -99,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,16 +4,17 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 6d79fbe1..ee90d841 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,38 +1,28 @@ repository_type: pipeline +nf_core_version: "2.14.1" lint: files_unchanged: - .github/CONTRIBUTING.md - .github/ISSUE_TEMPLATE/bug_report.yml - - .github/ISSUE_TEMPLATE/config.yml - - .github/ISSUE_TEMPLATE/feature_request.yml - .github/PULL_REQUEST_TEMPLATE.md - .github/workflows/branch.yml - - .github/workflows/linting.yml - .gitignore - - assets/email_template.html - assets/email_template.txt - assets/sendmail_template.txt - docs/README.md - LICENSE files_exist: - - .github/ISSUE_TEMPLATE/config.yml - - .github/workflows/awsfulltest.yml - - .github/workflows/awstest.yml - - assets/multiqc_config.yaml - - assets/nf-core-qbic-pipelines/rnadeseq_logo_light.png - - bin/markdown_to_html.r - - conf/test_full.config - - docs/images/nf-core-qbic-pipelines/rnadeseq_logo_dark.png - - docs/images/nf-core-qbic-pipelines/rnadeseq_logo_light.png - - lib/WorkflowQbic-pipelines/rnadeseq.groovy + - lib/nfcore_external_java_deps.jar nextflow_config: - - manifest.homePage - manifest.name - - params.input - - show_hidden_params + - manifest.homePage + - config_defaults: + - params.custom_config_base + - params.report_file + - params.references_file multiqc_config: - - report_comment # otherwise throws error: multiqc_config: 'assets/multiqc_config.yml' does not contain a matching 'report_comment'. │ + - report_comment # otherwise throws error: multiqc_config: 'assets/multiqc_config.yml' does not contain a matching 'report_comment'. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..4dc0f1dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,13 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 56d1d982..039ed3dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,35 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 2.3 + +### Added + +- [#237](https://github.com/qbic-pipelines/rnadeseq/pull/237) Added Euclidian distance/z-score info to report text/heatmaps; added tables next to heatmaps in case the heatmap plotting does not work +- [#229](https://github.com/qbic-pipelines/rnadeseq/pull/229) Added param for clustering (or not) the heatmaps +- [#226](https://github.com/qbic-pipelines/rnadeseq/pull/226) Added logic to read newer multiqc files from rnaseq 3.12 +- [#225](https://github.com/qbic-pipelines/rnadeseq/pull/225) Added param for pathway analysis datasources +- [#221](https://github.com/qbic-pipelines/rnadeseq/pull/221) Added padj to volcano hovertext + +### Changed + +- [#237](https://github.com/qbic-pipelines/rnadeseq/pull/237) Changed geom_jitter to geom_point for volcano plot so that points are accurate +- [#225](https://github.com/qbic-pipelines/rnadeseq/pull/225) Shortened names of some output files + +### Fixed + +- [#241](https://github.com/qbic-pipelines/rnadeseq/pull/241) Undo fix branch protection bug in order to get precommit test to run +- [#240](https://github.com/qbic-pipelines/rnadeseq/pull/240) Fix branch protection bug --> third attempt +- [#239](https://github.com/qbic-pipelines/rnadeseq/pull/239) Fix branch protection bug --> second attempt +- [#238](https://github.com/qbic-pipelines/rnadeseq/pull/238) Fix branch protection bug +- [#237](https://github.com/qbic-pipelines/rnadeseq/pull/237) Fix version in usage docs and remove duplicate datasources in test_custom_gmt.config +- [#236](https://github.com/qbic-pipelines/rnadeseq/pull/236) Fixed new multiqc check (in case both the files of the old and new mqc version are present) +- [#234](https://github.com/qbic-pipelines/rnadeseq/pull/234) Fixed unnecessary file permission changes from #225 +- [#228](https://github.com/qbic-pipelines/rnadeseq/pull/228) Fixed text in report +- [#229](https://github.com/qbic-pipelines/rnadeseq/pull/229) Fixed cutoff enrichment plot labels, fixed wrong plotMA function being called (also fixed this changelog) +- [#225](https://github.com/qbic-pipelines/rnadeseq/pull/225) Fixed too many devices error from tryCatch around normalized heatmaps +- [#221](https://github.com/qbic-pipelines/rnadeseq/pull/221) Fixed non-conformable arrays bug, fix wrong volcano colors when no DE genes + ## 2.2 Avenue of Poplars ### Added diff --git a/CITATIONS.md b/CITATIONS.md index b2f4f79e..34bac619 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -8,16 +8,6 @@ > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. -## Pipeline tools - -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. - -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/Dockerfile b/Dockerfile index c13267b3..617a13e0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,14 +5,14 @@ LABEL org.opencontainers.image.authors="Gisela Gabernet, Alexander Peltzer, Oska LABEL org.opencontainers.image.licenses=MIT COPY environment.yml / #RUN conda install -c conda-forge mamba -RUN mamba env create --file /environment.yml -p /opt/conda/envs/qbic-pipelines-rnadeseq-2.2 && \ +RUN mamba env create --file /environment.yml -p /opt/conda/envs/qbic-pipelines-rnadeseq-2.3 && \ mamba clean --all --yes RUN apt-get update -qq && \ apt-get install -y zip procps ghostscript # Add conda installation dir to PATH -ENV PATH /opt/conda/envs/qbic-pipelines-rnadeseq-2.2/bin:$PATH +ENV PATH /opt/conda/envs/qbic-pipelines-rnadeseq-2.3/bin:$PATH # Dump the details of the installed packates to a file for posterity -RUN mamba env export --name qbic-pipelines-rnadeseq-2.2 > qbic-pipelines-rnadeseq-2.2.yml +RUN mamba env export --name qbic-pipelines-rnadeseq-2.3 > qbic-pipelines-rnadeseq-2.3.yml # Instruct R processes to use these empty files instead of clashing with a local config RUN touch .Rprofile RUN touch .Renviron diff --git a/assets/RNAseq_report.Rmd b/assets/RNAseq_report.Rmd index b1aa99b8..9e66e7d4 100644 --- a/assets/RNAseq_report.Rmd +++ b/assets/RNAseq_report.Rmd @@ -43,6 +43,9 @@ params: custom_gmt: '' set_background: '' custom_background: '' + datasources: '' + heatmaps_cluster_rows: '' + heatmaps_cluster_cols: '' #Additional args for the report path_proj_summary: '' @@ -168,7 +171,7 @@ isProvided <- function(value) { #this function is used to produce a more informative error message when count table and metadata table disagree write_error_msg <- function(counts, qbicCodes) { error_msg <- "Count table headers do not exactly match the metadata table sample names!\n" - + counts_msg <- "" counts_offending <- "" for (c in names(counts)) { @@ -177,7 +180,7 @@ write_error_msg <- function(counts, qbicCodes) { counts_offending <- paste0(counts_offending, c, "\n") } } - + meta_msg <- "" meta_offending <- "" for (m in qbicCodes) { @@ -253,7 +256,6 @@ invisible( lapply(c( ``` ```{r create_outdirs, echo=FALSE, message=FALSE, warning=FALSE, results = 'hide'} - # create directories needed ifelse(!dir.exists("differential_gene_expression"), dir.create("differential_gene_expression"), FALSE) dir.create("differential_gene_expression/metadata") @@ -369,7 +371,7 @@ metadata$Secondary.Name <- gsub(" ; ", "_", metadata$Secondary.Name) metadata$Secondary.Name <- gsub(" ", "_", metadata$Secondary.Name) metadata$sampleName = paste(row.names(metadata),metadata$Secondary.Name,sep="_") row.names(metadata) = metadata$sampleName -metadata_save <- metadata +metadata_save <- metadata # save copy so that original metadata object can be changed if (params$input_type == "smrnaseq") { @@ -398,7 +400,7 @@ if (params$input_type == "smrnaseq") { missing_files <- paste0(substring(missing_files, 1, nchar(missing_files)-2), ".") stop(missing_files, call.=F) } - + # Combine files for later files <- c(hairpin_files, other_files) @@ -414,8 +416,6 @@ if (params$input_type == "smrnaseq") { count.table$Ensembl_ID <- rownames(count.table) count.table <- cbind(Ensembl_ID=rownames(count.table), gene_name=rownames(count.table), count.table) write.table(count.table, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) - - } # Load count table for FeatureCounts @@ -528,6 +528,7 @@ DT::datatable(df_QA_neat, options = list(scrollX = "100%", scrollY = "400px", p if (params$input_type %in% c("featurecounts", "smrnaseq")) { cds <- DESeqDataSetFromMatrix( countData =count.table, colData =metadata, design = eval(parse(text=as.character(design[[1]])))) } else if (params$input_type %in% c("rsem", "salmon")) { + ## Create a dataframe which consists of both the gene id and the transcript name gtf <- rtracklayer::import(params$path_gtf) gtf <- as.data.frame(gtf, header=T) @@ -599,7 +600,8 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { } } missing_files <- paste0(substring(missing_files, 1, nchar(missing_files)-2), ".") - stop(missing_files, call.=F) } + stop(missing_files, call.=F) + } #The following steps are necessary for the processing of salmon output as the files do # not contain integer counts and can therefore not be directly used for cds @@ -642,6 +644,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) { nm <- assays(cds)[["avgTxLength"]] sf <- estimateSizeFactorsForMatrix(counts(cds)/nm) } + write.table(sf,paste("differential_gene_expression/gene_counts_tables/sizeFactor_libraries.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = T, col.names = F, qmethod = c("escape", "double")) # Write cds assay table to file @@ -693,6 +696,7 @@ if (run_rlog){ # rlog transformation rld <- rlog(cds, blind=FALSE) rld_names <- merge(x=gene_names, y=assay(rld), by.x = "Ensembl_ID", by.y="row.names") + rld_names <- rld_names[order(rld_names$Ensembl_ID),] write.table(rld_names, "differential_gene_expression/gene_counts_tables/rlog_transformed_gene_counts.tsv", append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) # save table to another variable if pathway analysis @@ -706,6 +710,7 @@ if (run_rlog){ # vst transformation vsd <- vst(cds, blind=FALSE, nsub = params$nsub_genes) vsd_names <- merge(x=gene_names, y=assay(vsd), by.x = "Ensembl_ID", by.y="row.names") + vsd_names <- vsd_names[order(vsd_names$Ensembl_ID),] write.table(vsd_names, "differential_gene_expression/gene_counts_tables/vst_transformed_gene_counts.tsv", append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double")) # save table to another variable if pathway analysis @@ -719,7 +724,7 @@ if (run_rlog){ # Cooks distances: get important for example when checking knock-out and overexpression studies pdf("differential_gene_expression/plots/further_diagnostics_plots/Cooks-distances.pdf") par(mar=c(10,3,3,3)) -par( mfrow = c(1,2)) +par(mfrow = c(1,2)) boxplot(log10(assays(cds)[["cooks"]]), range=0, las=2,ylim = c(-15, 15),main="log10-Cooks") boxplot(log2(assays(cds)[["cooks"]]), range=0, las=2,ylim = c(-15, 15),main="log2-Cooks") dev.off() @@ -775,13 +780,13 @@ res=0 for (i in resultsNames(cds)[-1]) { res = results(cds,name = i) pdf(paste("differential_gene_expression/plots/further_diagnostics_plots/all_results_MA_plot_",i,".pdf",sep="")) - plotMA(res,ylim = c(-4, 4)) + DESeq2::plotMA(res,ylim = c(-4, 4)) dev.off() png(paste("differential_gene_expression/plots/further_diagnostics_plots/all_results_MA_plot_",i,".png",sep="")) - plotMA(res,ylim = c(-4, 4)) + DESeq2::plotMA(res,ylim = c(-4, 4)) dev.off() svg(paste("differential_gene_expression/plots/further_diagnostics_plots/all_results_MA_plot_",i,".svg",sep="")) - plotMA(res,ylim = c(-4, 4)) + DESeq2::plotMA(res,ylim = c(-4, 4)) dev.off() # multiple hyptothesis testing @@ -864,6 +869,51 @@ for (i in resultsNames(cds)[-1]) { ```{r summary_stats, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")), results = 'asis'} +# Check first if a new or old multiqc file was provided +if (file.exists(paste0(wd,"/QC/multiqc_data/multiqc_general_stats.txt"))) { + mqc_stats <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_general_stats.txt"), header=TRUE, sep="\t") + columns <- c( + "Sample", + "FastQC_mqc.generalstats.fastqc.total_sequences", + "FastQC_mqc.generalstats.fastqc.percent_duplicates", + "FastQC_mqc.generalstats.fastqc.percent_gc", + "Cutadapt_mqc.generalstats.cutadapt.percent_trimmed", + "STAR_mqc.generalstats.star.uniquely_mapped_percent", + "featureCounts_mqc.generalstats.featurecounts.percent_assigned" + ) + if (all(columns %in% colnames(mqc_stats))) { + mqc_version <- 'old_mqc' + } else if (file.exists(paste0(wd,"/QC/multiqc_data/multiqc_samtools_stats.txt"))) { + mqc_stats <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_samtools_stats.txt"), header=TRUE, sep="\t") + columns <- c( + "Sample", + "raw_total_sequences", + "reads_mapped_percent", + "reads_duplicated_percent" + ) + if (all(columns %in% colnames(mqc_stats))) { + mqc_version <- 'new_mqc' + } else { + stop("Could not find a suitable multiqc table; please provide a correct multiqc.zip file or omit the parameter --multiqc altogether.") + } + } +} else if (file.exists(paste0(wd,"/QC/multiqc_data/multiqc_samtools_stats.txt"))) { + mqc_stats <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_samtools_stats.txt"), header=TRUE, sep="\t") + columns <- c( + "Sample", + "raw_total_sequences", + "reads_mapped_percent", + "reads_duplicated_percent" + ) + if (all(columns %in% colnames(mqc_stats))) { + mqc_version <- 'new_mqc' + } else { + stop("Could not find a suitable multiqc table; please provide a correct multiqc.zip file or omit the parameter --multiqc altogether.") + } +} else { + stop("Could not find a suitable multiqc table; please provide a correct multiqc.zip file or omit the parameter --multiqc altogether.") +} + cat(paste0("*** # Read mapping and assignment @@ -875,57 +925,145 @@ The read sequencing quality was analyzed using `FASTQC`. More detailed informati The read mapping and gene assignment statistics, together with other quality control parameters are summarized here. -The table below, extracted from the `MultiQC` report, shows a summary of the bioinformatics analysis quality control. -Note: Duplicate intercept (%) is the percentage of duplicate reads for the intercept of a linear model of duplicate reads vs read counts. High numbers of duplicates at low read counts can indicate low library complexity with technical duplication." +The table below, extracted from the `MultiQC` report, shows a summary of the bioinformatics analysis quality control.", +ifelse(mqc_version == 'old_mqc', +"Note: Duplicate intercept (%) is the percentage of duplicate reads for the intercept of a linear model of duplicate reads vs read counts. High numbers of duplicates at low read counts can indicate low library complexity with technical duplication.", +"") )) -df_DE <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_general_stats.txt"), header=TRUE, sep="\t") -is.num <- sapply(df_DE, is.numeric) -df_DE[is.num] <- lapply(df_DE[is.num], round, 2) -columns <- c("Sample", - "FastQC_mqc.generalstats.fastqc.total_sequences", - "FastQC_mqc.generalstats.fastqc.percent_duplicates", - "FastQC_mqc.generalstats.fastqc.percent_gc", - "Cutadapt_mqc.generalstats.cutadapt.percent_trimmed", - "STAR_mqc.generalstats.star.uniquely_mapped_percent", - "featureCounts_mqc.generalstats.featurecounts.percent_assigned") -df_DE_selected <- df_DE[,columns] + +is.num <- sapply(mqc_stats, is.numeric) +mqc_stats[is.num] <- lapply(mqc_stats[is.num], round, 2) # The following gets rid of multiQC entries that are spread across two rows, one ending with -fw (see https://github.com/qbic-pipelines/rnadeseq/issues/114) -sum_NA <- function(x) {if (all(is.na(x))) x[NA_integer_] else sum(x, na.rm = TRUE)} -df_DE_selected$Sample <- gsub("-fw", "", df_DE_selected$Sample) -df_DE_selected <- df_DE_selected %>% group_by(Sample) %>% summarise_all(sum_NA) +mqc_stats$Sample <- gsub("-fw", "", mqc_stats$Sample) # Remove -fw to produce duplicate rows for summarising + +# In newer RNASeq versions (after 1.4.2?), duplicate rows might not end with -fw, but with _1 and _2 instead; deal with those in the following lines +mqc_stats$Sample <- sub("_[0-9]+$", "", mqc_stats$Sample) # Remove _1 etc. to produce duplicate rows for summarising +# Now that the duplicate columns have been given the same names, we can group and collapse them with the appropriate functions +sum_NA <- function(x) {if (all(is.na(x))) x[NA_integer_] else sum(x, na.rm = TRUE)} # Function to sum across rows if available, else NA +avg_NA <- function(x) {if (all(is.na(x))) x[NA_integer_] else mean(x, na.rm = TRUE)} # Function to average across rows if available, else NA -df_DE_selected$Sample <- substr(df_DE_selected$Sample, 1, 10) -colnames(df_DE_selected) <- c("Sample", "C1", "C2", "C3", "C4", "C5", "C6") -n_rows = nrow(df_DE_selected) +if (mqc_version == 'old_mqc') { + table_complete <- mqc_stats[,columns] %>% + group_by(Sample) %>% + dplyr::summarize( + across(matches("FastQC_mqc.generalstats.fastqc.total_sequences"), sum_NA), + across(everything(), avg_NA) + ) +} else { + table_complete <- mqc_stats[,columns] %>% + group_by(Sample) %>% + dplyr::summarize( + across(matches("raw_total_sequences"), sum_NA), + across(everything(), avg_NA) + ) + + # For the new version, some more tables need to be read + if (params$input_type == "salmon" && file.exists(paste0(wd,"/QC/multiqc_data/multiqc_star.txt"))) { + star <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_star.txt"), header=TRUE, sep="\t", check.names = F)[,c("Sample", "uniquely_mapped_percent")] + } else if (params$input_type == "rsem" && file.exists(paste0(wd,"/QC/multiqc_data/multiqc_rsem.txt"))) { + star <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_rsem.txt"), header=TRUE, sep="\t", check.names = F)[,c("Sample", "Total", "Unique")] + star$uniquely_mapped_percent <- star$Unique/star$Total + star$Unique <- NULL + star$Total <- NULL + } else { + star <- NULL + } + if (file.exists(paste0(wd,"/QC/multiqc_data/multiqc_cutadapt.txt"))) { + cutadapt <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_cutadapt.txt"), header=TRUE, sep="\t", check.names = F)[,c("Sample", "percent_trimmed")] + cutadapt$Sample <- sub("_[0-9]+$", "", cutadapt$Sample) # Remove _1 etc. to produce duplicate rows for summarising + cutadapt <- cutadapt %>% group_by(Sample) %>% summarise_all(avg_NA) + } else { + cutadapt <- NULL + } + if (file.exists(paste0(wd,"/QC/multiqc_data/multiqc_rseqc_read_distribution.txt"))) { + rseqc <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_rseqc_read_distribution.txt"), header=TRUE, sep="\t", check.names = F)[,c("Sample", "total_tags", "total_assigned_tags")] + } else { + rseqc <- NULL + } + if (file.exists(paste0(wd,"/QC/multiqc_data/multiqc_fastqc.txt"))) { + fastqc_untrimmed <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_fastqc.txt"), header=TRUE, sep="\t", check.names = F)[,c("Sample", "%GC")] + colnames(fastqc_untrimmed)[colnames(fastqc_untrimmed) == "%GC"] <- "GC_untrimmed" + fastqc_untrimmed$Sample <- sub("_[0-9]+$", "", fastqc_untrimmed$Sample) # Remove _1 etc. to produce duplicate rows for summarising + fastqc_untrimmed <- fastqc_untrimmed %>% group_by(Sample) %>% summarise_all(avg_NA) + } else { + fastqc_untrimmed <- NULL + } + if (file.exists(paste0(wd,"/QC/multiqc_data/multiqc_fastqc_1.txt"))) { + fastqc_trimmed <- read.table(file = paste0(wd,"/QC/multiqc_data/multiqc_fastqc_1.txt"), header=TRUE, sep="\t", check.names = F)[,c("Sample", "%GC")] + colnames(fastqc_trimmed)[colnames(fastqc_trimmed) == "%GC"] <- "GC_trimmed" + fastqc_trimmed$Sample <- sub("_[0-9]+$", "", fastqc_trimmed$Sample) # Remove _1 etc. to produce duplicate rows for summarising + fastqc_trimmed <- fastqc_trimmed %>% group_by(Sample) %>% summarise_all(avg_NA) + } else { + fastqc_trimmed <- NULL + } +} + +if (mqc_version == 'new_mqc') { + + # Merge the additional tables into the stats + if (!is.null(cutadapt)) table_complete <- merge(table_complete,cutadapt,by="Sample") + if (!is.null(rseqc)) table_complete <- merge(table_complete,rseqc,by="Sample") + if (!is.null(star)) table_complete <- merge(table_complete,star,by="Sample") + if (!is.null(fastqc_untrimmed)) table_complete <- merge(table_complete,fastqc_untrimmed,by="Sample") + if (!is.null(fastqc_trimmed)) table_complete <- merge(table_complete,fastqc_trimmed,by="Sample") +} + +# Reduce to QBiC code +table_complete$Sample <- substr(table_complete$Sample, 1, 10) metadata <- read.table((paste0(wd, "/differential_gene_expression/metadata/metadata.tsv")), header=TRUE, sep="\t") -metadata = metadata[,c("QBiC.Code")] +metadata <- metadata[,c("QBiC.Code")] #the following makes metadata a df again (in R, when extracting only 1 column from a df, it becomes a list) metadata <- data.frame(matrix(unlist(metadata))) colnames(metadata) <- c("Sample") -table_complete = merge(metadata,df_DE_selected,by="Sample") -colnames = c("Sample", "Number of reads (M)", "Duplicates (%)", "GC (%)", "Trimmed reads (%)", "Mapped reads (%)", "Assigned reads (%)") -colnames(table_complete) <- c("Sample", "C1", "C2", "C3", "C4", "C5", "C6") +table_complete <- merge(metadata,table_complete,by="Sample") +if (mqc_version == 'old_mqc') { + colnames <- c("Sample", "Number of reads (M)", "Duplicates (%)", "GC (%)", "Trimmed reads (%)", "Mapped reads (%)", "Assigned reads (%)") + colnames(table_complete) <- c("Sample", "ReadNumber", "DuplicateReadsIntercept", "GCcontent", "TrimmedReads", "MappedReads", "AssignedReads") + + table_complete <- table_complete %>% + mutate( + Sample = Sample, + ReadNumber = round((ReadNumber/1000000),2), + DuplicateReadsIntercept = ifelse(DuplicateReadsIntercept > 1, + cell_spec(DuplicateReadsIntercept, color="orange", bold=T), + cell_spec(DuplicateReadsIntercept, color="black")), + GCcontent = GCcontent, + TrimmedReads = TrimmedReads, + MappedReads = ifelse(MappedReads > 80, + color_bar("lightblue")(MappedReads), + color_bar("orange")(MappedReads)), + AssignedReads = ifelse(AssignedReads > 60, + color_bar("lightblue")(AssignedReads), + color_bar("orange")(AssignedReads)) + ) +} else { + colnames <- c("Sample", "Number of reads (M)", "Duplicates (%)", "Untrimmed GC (%)", "Trimmed GC (%)", "Trimmed reads (%)", "Mapped reads (%)", "Assigned reads (%)") + + table_complete <- table_complete %>% + transmute( + Sample = Sample, + ReadNumber = color_bar("lightblue")(round((raw_total_sequences/1000000),2)), + DuplicateReadsIntercept = ifelse(reads_duplicated_percent > 1, + cell_spec(reads_duplicated_percent, color="orange", bold=T), + cell_spec(reads_duplicated_percent, color="black")), + GC_untrimmed = GC_untrimmed, + GC_trimmed = GC_trimmed, + TrimmedReads = percent_trimmed, + MappedReads = ifelse(uniquely_mapped_percent > 80, + color_bar("lightblue")(uniquely_mapped_percent), + color_bar("orange")(uniquely_mapped_percent)), + AssignedReads = ifelse((total_assigned_tags/total_tags*100) > 60, + color_bar("lightblue")((total_assigned_tags/total_tags)*100), + color_bar("orange")((total_assigned_tags/total_tags)*100)) + ) +} + +# This does some final formatting of the table in the report table_complete %>% - mutate( - Sample = Sample, - ReadNumber = color_bar("lightblue")(round((C1/1000000),2)), - DuplicateReadsIntercept = ifelse(C2 > 1, - cell_spec(C2, color="orange", bold=T), - cell_spec(C2, color="black")), - GCcontent = C3, - TrimmedReads = C4, - MappedReads = ifelse(C5 > 80, - color_bar("lightblue")(C5), - color_bar("orange")(C5)), - AssignedReads = ifelse(C6 > 60, - color_bar("lightblue")(C6), - color_bar("orange")(C6)) - ) %>% - dplyr::select(1,8:13) %>% kable(escape=F,row.names=F, col.names=colnames) %>% kable_styling("hover") %>% scroll_box(width = "100%", height = "400px") @@ -948,7 +1086,7 @@ The read mapping statistics to the reference genome for each sample are shown be ```{r STAR_percentages_plot, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")), out.width="160%", dpi=1800, fig.cap="STAR: Mapping Statistics", fig.align='center'} # Disable auto-conversion of paths to relative paths as we provide include_graphics() with absolute paths. Also, error=F is necessary in every include_graphic() call to not fail with an error knitr.graphics.rel_path = F -knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/mqc_star_alignment_plot_1_pc.svg"), error=F) +knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/", ifelse(mqc_version == 'old_mqc',"mqc_star_alignment_plot_1_pc.svg", "mqc_samtools_alignment_plot_1_pc.svg")), error=F) ``` ```{r STAR_readnums,echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/"))} @@ -960,14 +1098,15 @@ cat(paste0("*** ``` ```{r STAR_readnums_plot, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")), out.width="160%", dpi=1800, fig.cap="STAR: Mapping Statistics", fig.align='center'} -knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/mqc_star_alignment_plot_1.svg"), error=F) +knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/", ifelse(mqc_version == 'old_mqc',"mqc_star_alignment_plot_1.svg", "mqc_samtools_alignment_plot_1.svg")), error=F) + ``` ```{r FC_read_assignment, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/"))} cat(paste0("*** \ - +", ifelse(mqc_version == 'old_mqc', " ## Read assignment statistics {.tabset} The statistics of read assignment to genes are shown below. Most reads should be assigned. Interpretation of the different assignment types: @@ -982,14 +1121,18 @@ The statistics of read assignment to genes are shown below. Most reads should be ### Percentages {-}
-")) +", " +## Read assignment statistics {.tabset} + +The statistics of read assignment to genes are shown below for pipeline runs on data produced by `rnaseq <= v1.4.2`. If a later version was used, these plots are not generated by rnaseq anymore. However, you can find `star/featurecounts/.featureCounts.txt.summary` or `star_salmon/featurecounts/.featureCounts.txt.summary` files in the rnaseq output folder which contain the necessary information for creating such plots. +"))) ``` -```{r FC_percentages_plot, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")), out.width="160%", dpi=1200, fig.cap="featureCounts: Assignments", fig.align='center'} +```{r FC_percentages_plot, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")) && mqc_version == 'old_mqc', out.width="160%", dpi=1200, fig.cap=ifelse(mqc_version == 'old_mqc', "featureCounts: Assignments", "STAR: Alignments"), fig.align='center'} knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/mqc_featureCounts_assignment_plot_1_pc.svg"), error=F) ``` -```{r FC_readnums, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/"))} +```{r FC_readnums, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")) && mqc_version == 'old_mqc'} cat(paste0("***
@@ -999,7 +1142,7 @@ cat(paste0("*** ")) ``` -```{r FC_readnums_plot, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")), out.width="160%", dpi=1200, fig.cap="featureCounts: Assignments", fig.align='center'} +```{r FC_readnums_plot, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")) && mqc_version == 'old_mqc', out.width="160%", dpi=1200, fig.cap=ifelse(mqc_version == 'old_mqc', "featureCounts: Assignments", "STAR: Alignments"), fig.align='center'} knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/mqc_featureCounts_assignment_plot_1.svg"), error=F) ``` @@ -1037,6 +1180,23 @@ cat(paste0("*** knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/mqc_rseqc_read_distribution_plot_1.svg"), error=F) ``` +```{r Read_strand, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/"))} +cat(paste0("*** + +\ + +## Read strandedness {.tabset} + +The following plots show the number/percentage of reads that match the strandedness of overlapping transcripts. It can be used to infer whether RNA-seq library preps are stranded (sense or antisense). + +
+")) +``` + +```{r Read_strandpercentages_plot, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/")), out.width="160%", dpi=1200, fig.cap="RSeQC: Read Distribution", fig.align='center'} +knitr::include_graphics(paste0(wd, "/QC/multiqc_plots/svg/mqc_rseqc_infer_experiment_plot_1.svg"), error=F) +``` + ```{r MQC_end, echo=FALSE, message=FALSE, warning=FALSE, results='asis', eval=dir.exists(paste0(wd,"/QC/multiqc_data/"))} cat(paste0("***
@@ -1052,7 +1212,7 @@ cat(paste0("*** The raw count table and normalized count tables are available [here](./differential_gene_expression/gene_counts_tables). The differential expression analysis is performed using the raw gene count table. -For PCA analysis and heatmap plotting, the `r norm_method_long_text` normalized gene counts were used. +For PCA analysis and heatmap plotting, the `r norm_method_long_text` normalized gene counts were used. `r norm_method_override_text` `r nsub_genes_text` @@ -1113,9 +1273,20 @@ A PCA of the batch-effect corrected data is shown below and can be found [here]( if (run_rlog) { assay(rld) <- limma::removeBatchEffect(assay(rld), rld$batch) rld_corrected <- rld + out_rld <- as.data.frame(assay(rld_corrected)) + out_rld <- out_rld[order(rownames(out_rld)),] + out_rld <- cbind(Ensembl_ID = rownames(out_rld), gene_name = rld_names$gene_name, out_rld[,!names(out_rld) %in% c("gene_name")]) + + write.table(out_rld, file="differential_gene_expression/gene_counts_tables/rlog_transformed_gene_counts_batchcorrected.tsv", quote=F, sep="\t", row.names=F) } else { assay(vsd) <- limma::removeBatchEffect(assay(vsd), vsd$batch) vsd_corrected <- vsd + + out_vsd <- as.data.frame(assay(vsd_corrected)) + out_vsd <- out_vsd[order(rownames(out_vsd)),] + out_vsd <- cbind(Ensembl_ID = rownames(out_vsd), gene_name = vsd_names$gene_name, out_vsd[,!names(out_vsd) %in% c("gene_name")]) + + write.table(out_vsd, file="differential_gene_expression/gene_counts_tables/vst_transformed_gene_counts_batchcorrected.tsv", quote=F, sep="\t", row.names=F) } pcaData2 <- plotPCA(if (run_rlog) rld else vsd, intgroup=c("combfactor"), ntop = dim(if (run_rlog) rld else vsd)[1], returnData=TRUE) percentVar <- round(100*attr(pcaData, "percentVar")) @@ -1149,7 +1320,7 @@ config(pca2, modeBarButtonsToAdd = list(svg_exp)) %>% layout() ## Sample distance heatmap -A sample distance heatmap was generated from the distances of the normalized expression values for all samples. +A sample distance heatmap was generated from the Euclidean distances of the normalized expression values for all samples. This visualizes how similar the samples are to each other according to gene expression. Two samples have more similar gene expression if their distance is closer to 0 (darker blue). @@ -1163,6 +1334,7 @@ sampleDistClust <- hclust(sampleDists) # Do clustering as input for heatmaps so sampleDistMatrix <- as.matrix(sampleDists) colors = colorRampPalette(rev(RColorBrewer::brewer.pal(9, "Blues")))(255) par(oma=c(3,3,3,3)) +write.table(sampleDistMatrix, file="differential_gene_expression/plots/Heatmaps_of_distances.tsv", sep="\t", quote=F) dist_heatmap <- pheatmap(mat = sampleDistMatrix, color=colors, cluster_rows = sampleDistClust, cluster_cols = sampleDistClust) svg("differential_gene_expression/plots/Heatmaps_of_distances.svg") @@ -1213,7 +1385,7 @@ After unzipping the results ZIP file, the differential expression results can be - `DE_genes_tables/`: folder containing one tab-separated table for each of the contrasts in the analysis. Each table contains a list of all differentially expressed genes in the contrast, specifying the mean gene expression across all samples (baseMean), and the log2 fold change value and p-adjusted values (padj) for this contrast. - `final_gene_table/final_gene_list_DESeq2.tsv`: table containing a list of all genes considered in the analysis. Here a summary of the log2 Fold Change and p-adjusted values for all contrasts is displayed. Additionally, the column **filter** shows if this gene was differentially expressed (DE) in any of the contrasts, or not (not_DE). The column **contrast_vector** contains for each contrast considered in the analysis a 1 if the gene was differentially expressed for this contrast or a 0 if it was not. - `plots/`: - - `Heatmaps_of_distances.pdf/.svg`: heatmap of the pairwise euclidean distances among samples, when the `r norm_method_text` normalized gene counts are considered. + - `Heatmaps_of_distances.pdf/.png/.svg`: heatmap of the pairwise Euclidean distances among samples, when the `r norm_method_text` normalized gene counts are considered. - `PCA_plot.pdf`: PCA of the `r norm_method_text` normalized gene counts. - `boxplots_example_genes/`: boxplots of the normalized gene counts for each of the sample groups for example genes. - `boxplots_requested_genes/`: boxplots of the normalized gene counts for each of the sample groups for the list of requested genes. @@ -1251,7 +1423,7 @@ if (isProvided(params$path_contrast_matrix)){ # Check that contrast matrix is valid if (length(coefficients) != nrow(contrasts)){ - stop("Error: Your contrast table has a different number of rows than the number of coefficients in the DESeq2 model.") + stop("Your contrast table has a different number of rows than the number of coefficients in the DESeq2 model.") } ## Contrast calculation for contrast matrix @@ -1348,6 +1520,7 @@ if (isProvided(params$path_contrast_list)) { contrast_names <- append(contrast_names, contname) } } + if (isProvided(params$path_contrast_pairs)) { contrasts <- read.table(path_contrast_pairs, sep="\t", header = T, colClasses = "character") write.table(contrasts, file="differential_gene_expression/metadata/contrast_pairs.tsv", sep="\t", quote=F, col.names = T, row.names = F) @@ -1437,6 +1610,11 @@ write(contrast_names, file="contrast_names.txt", sep="\t") # Remove identical columns of DE_genes_df DE_genes_df$DE_genes_df <- NULL idx <- duplicated(t(DE_genes_df)) + +# If any padj or logFC columns were marked for removal (TRUE), undo that by setting FALSE as we need these cols +idx[grepl("padj", as.character(names(idx)))] <- FALSE +idx[grepl("log2FoldChange", as.character(names(idx)))] <- FALSE + DE_genes_df <- DE_genes_df[, !idx] DE_genes_df$Ensembl_ID <- row.names(DE_genes_df) DE_genes_df <- DE_genes_df[,c(dim(DE_genes_df)[2],1:dim(DE_genes_df)[2]-1)] @@ -1552,17 +1730,20 @@ for (file in allgenes_files){ } DE_all <- ldply(table_list, rbind) DE_all$logpval <- -log10(DE_all$padj) +DE_all$truelogpval <- DE_all$logpval # Save true value for hovertext DE_all$logpval[DE_all$logpval > 16] <- 17 DE_all <- na.omit(DE_all) +DE_all <- DE_all %>% mutate(color = ifelse(abs(DE_all$log2FoldChange) >= params$logFC_threshold & DE_all$logpval >= -log10(params$adj_pval_threshold), "red", "black")) # Create column which assigns each entry a plot color so that coloring is consistent even when no DE genes are found log2FoldChange_min <- min(DE_all$log2FoldChange) log2FoldChange_max <- max(DE_all$log2FoldChange) # get quotient of log2FoldChange range and 10 to determine if xticks of plot need to be scaled discretely (otherwise, for large FC ranges, there will be too many xticks and they will overlap) log2FoldChange_quotient <- length(seq(log2FoldChange_min, log2FoldChange_max, 10)) -pg <- ggplot(DE_all, aes(x=log2FoldChange, y=logpval, text=paste("Gene: ", gene_name, "
", "Log2FC: ", formatC(log2FoldChange, digits=2)))) + - #this goes after alpha=0.5: - geom_jitter(alpha=0.5, width = 0.2, aes(color=ifelse(abs(DE_all$log2FoldChange) >= params$logFC_threshold & DE_all$logpval >= -log10(params$adj_pval_threshold), "Differentially expressed genes", "Non-differentially expressed genes"))) + +pg <- ggplot(DE_all, aes(x=log2FoldChange, y=logpval, text=paste("Gene: ", gene_name, + "
", "Log2FC: ", formatC(log2FoldChange, digits=2), + "
", "-Log10padj: ", formatC(truelogpval, digits=2)))) + + geom_point(alpha=0.5, width = 0.2, aes(color=ifelse(DE_all$color == "red", "Differentially expressed genes", "Non-differentially expressed genes"))) + geom_hline(yintercept = 16, linetype= "dashed", size = 0.2, color = "grey") + geom_hline(yintercept = -log10(params$adj_pval_threshold), size = 0.2, color = "grey") + geom_vline(xintercept = -params$logFC_threshold, size = 0.2, color = "grey") + @@ -1650,7 +1831,7 @@ if (isProvided(params$path_genelist)) { ggsave(filename=paste("differential_gene_expression/plots/boxplots_requested_genes/",requested_genes_plot_gene_name[i],"_",requested_genes_plot_Ensembl[i],".svg",sep=""), width=10, height=5, plot=plot) ggsave(filename=paste("differential_gene_expression/plots/boxplots_requested_genes/",requested_genes_plot_gene_name[i],"_",requested_genes_plot_Ensembl[i],".png",sep=""), width=10, height=5, plot=plot) ggsave(filename=paste("differential_gene_expression/plots/boxplots_requested_genes/",requested_genes_plot_gene_name[i],"_",requested_genes_plot_Ensembl[i],".pdf",sep=""), width=10, height=5, plot=plot) - } + } } ``` @@ -1717,7 +1898,7 @@ if (isProvided(params$path_genelist)) { ggsave(filename=paste0("differential_gene_expression/plots/boxplots_requested_genes/",requested_genes_plot_gene_name[i],"_",requested_genes_plot_Ensembl[i],"_after_batchcorrect.svg"), width=10, height=5, plot=plot) ggsave(filename=paste0("differential_gene_expression/plots/boxplots_requested_genes/",requested_genes_plot_gene_name[i],"_",requested_genes_plot_Ensembl[i],"_after_batchcorrect.png"), width=10, height=5, plot=plot) ggsave(filename=paste0("differential_gene_expression/plots/boxplots_requested_genes/",requested_genes_plot_gene_name[i],"_",requested_genes_plot_Ensembl[i],"_after_batchcorrect.pdf"), width=10, height=5, plot=plot) - } + } } ``` @@ -1771,8 +1952,12 @@ BiocManager::install(params$species_library, lib=species_dir, version="3.17", fo library(params$species_library, lib.loc=species_dir, character.only=T) species_library_installed <- get(params$species_library) -# gprofiler pathway / term sources parameters -datasources <- c("KEGG", "REAC") +# If provided, prepare datasources from comma-separated string +if (isProvided(params$datasources)) { + datasources <- unlist(strsplit(params$datasources, ',')) +} else { + datasources <- c() +} # Default to 1 for the nb of genes DE in a pathway min_DEG_pathway <- as.integer(params$min_DEG_pathway) @@ -1802,7 +1987,18 @@ mv_status <- F if (isProvided(params$custom_gmt)){ # If custom GMT file was provided, filter it before uploading to use in gost queries (gprofiler will NOT filter itself, so otherwise the output will contain all kinds of datasources, not just the selected ones) - out_gmt <- (Filter(function(line) any(startsWith(line, datasources)), readLines(params$custom_gmt))) + out_gmt <- readLines(params$custom_gmt) + + # If datasources are also provided, filter the GMT + if (isProvided(params$datasources)) { + go_positions <- grep("^GO", datasources) + if (length(go_positions)) { + datasources <- my_vector[-go_positions] # If there are any datasources starting with GO, remove them (gets rid of GO:BP etc.) + datasources <- append(datasources, "GO") # Then add the single entry GO, as otherwise, the filtering might not recognize these datasources + } + out_gmt <- (Filter(function(line) any(startsWith(line, datasources)), out_gmt)) # Remove GMT lines that don't start with a datasource + } + out_path <- paste0(tools::file_path_sans_ext(basename(params$custom_gmt)), "_filtered.gmt") writeLines(out_gmt, out_path) gost_id <- upload_GMT_file(out_path) @@ -1844,6 +2040,11 @@ if (mv_status) { } } +# Set datasources to NULL instead of c() to ensure that the gost queries work +if (!isProvided(params$datasources)) { + datasources <- NULL +} + # ------------------ # Set default params # ------------------ @@ -1859,11 +2060,12 @@ theme_set(theme_classic()) for (file in contrast_files){ - #Reading DE genes list + # Read DE genes list fname <- tools::file_path_sans_ext(basename(file)) dir.create(paste("pathway_analysis", fname, sep="/")) dir.create(paste("pathway_analysis", fname, "KEGG_pathways", sep="/")) + dir.create(paste("pathway_analysis", fname, "enrichment_plots", sep="/")) DE_genes <- read.csv(file = paste0(path_contrasts, file), sep="\t", header = T) DE_genes <- as.data.frame(DE_genes) @@ -1888,11 +2090,11 @@ for (file in contrast_files){ custom_bg=custom_background, domain_scope="custom_annotated" ) - + pathway_gostres <- gostres$result pathway_gostres <- as.data.frame(pathway_gostres[which(pathway_gostres$significant==TRUE),]) pathway_gostres <- pathway_gostres[which(pathway_gostres$intersection_size>=min_DEG_pathway),] - + # Also run without bg for Venn diagrams gostres_nobg <- gost(query=q, organism=gost_id, @@ -1919,7 +2121,6 @@ for (file in contrast_files){ ) } - # Make data frame of gost result pathway_gostres <- gostres$result @@ -1938,13 +2139,13 @@ for (file in contrast_files){ # Generate non-interactive pathway dotplots in the folder pg <- gostplot(gostres, capped=T, interactive=F) - ggsave(pg, filename = paste0("pathway_analysis", "/", fname, "_gost_pathway_enrichment_plot.pdf"), + ggsave(pg, filename = paste0("pathway_analysis/", "/", fname, "/gost_pathway_gostplot.pdf"), device="pdf", height=10, width=15, units="cm", limitsize=F) - ggsave(pg, filename = paste0("pathway_analysis", "/", fname, "_gost_pathway_enrichment_plot.png"), + ggsave(pg, filename = paste0("pathway_analysis/", "/", fname, "/gost_pathway_gostplot.png"), device="png", height=10, width=15, units="cm", dpi=300, limitsize=F) - ggsave(pg, filename = paste0("pathway_analysis", "/", fname, "_gost_pathway_enrichment_plot.svg"), + ggsave(pg, filename = paste0("pathway_analysis/", "/", fname, "/gost_pathway_gostplot.svg"), device="svg", height=10, width=15, units="cm", dpi=300, limitsize=F) @@ -1958,9 +2159,9 @@ for (file in contrast_files){ no_background=pathway_gostres_nobg$term_name ) pv <- ggvenn::ggvenn(VennInput) - ggsave(paste0("pathway_analysis", "/", fname, "_gost_pathway_venn_diagram.pdf"),pv, device="pdf", width=10, height=15) - ggsave(paste0("pathway_analysis", "/", fname, "_gost_pathway_venn_diagram.png"),pv, device="png", width=10, height=15) - ggsave(paste0("pathway_analysis", "/", fname, "_gost_pathway_venn_diagram.svg"),pv, device="svg", width=10, height=15) + ggsave(paste0("pathway_analysis", "/", fname, "/gost_pathway_venn_diagram.pdf"),pv, device="pdf", width=10, height=15) + ggsave(paste0("pathway_analysis", "/", fname, "/gost_pathway_venn_diagram.png"),pv, device="png", width=10, height=15) + ggsave(paste0("pathway_analysis", "/", fname, "/gost_pathway_venn_diagram.svg"),pv, device="svg", width=10, height=15) } } @@ -1970,7 +2171,7 @@ for (file in contrast_files){ # Save pathway enrichment table in tsv format write.table(pathway_gostres_table, - file = paste0("pathway_analysis", "/", fname, "/", fname, "_pathway_enrichment_results.tsv"), + file = paste0("pathway_analysis", "/", fname, "/pathway_enrichment_results.tsv"), sep="\t", quote = F, col.names = T, row.names = F) print("------------------------------------") @@ -1986,13 +2187,14 @@ for (file in contrast_files){ # Splitting results according to pathway resources (KEGG / REACTOME / GO) res <- split(pathway_gostres, pathway_gostres$source) for (df in res){ + db_source <- df$source[1] df$short_name <- sapply(df$term_name, substr, start=1, stop=50) # Plotting results for df df_subset <- data.frame(Pathway_name = df$short_name, Pathway_code = df$term_id, DE_genes = df$intersection_size, Pathway_size = df$term_size, Fraction_DE = (df$intersection_size / df$term_size), Padj = df$p_value, DE_genes_names = df$intersection) write.table(df_subset, - file = paste0("pathway_analysis", "/", fname, "/", fname, "_", db_source, "_pathway_enrichment_results.tsv"), + file = paste0("pathway_analysis", "/", fname, "/", make.names(db_source), "_pathway_enrichment_results.tsv"), sep="\t", quote = F, col.names = T, row.names = F) # Enriched pathways horizontal barplots of padj values @@ -2000,13 +2202,13 @@ for (file in contrast_files){ geom_bar(aes(fill=Padj), stat="identity", width = 0.7) + geom_text(aes(label=paste0(df_subset$DE_genes, "/", df_subset$Pathway_size)), vjust=0.4, hjust=-0.5, size=3) + coord_flip() + - scale_y_continuous(limits = c(0.00, 1.00)) + + scale_y_continuous(limits = c(0.00, 1.00), expand=expansion(mult=c(0,0.2))) + scale_fill_continuous(high = "#132B43", low = "#56B1F7") + ggtitle("Enriched pathways") + xlab("") + ylab("Gene fraction (DE genes / Pathway size)") - ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/", fname, "_", db_source, "_pathway_enrichment_plot.pdf"), device = "pdf", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) - ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/", fname,"_", db_source, "_pathway_enrichment_plot.png"), device = "png", height = 5+0.5*nrow(df_subset), units = "cm", dpi = 300, limitsize=F) - ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/", fname,"_", db_source, "_pathway_enrichment_plot.svg"), device = "svg", height = 5+0.5*nrow(df_subset), units = "cm", dpi = 300, limitsize=F) + ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.pdf"), device = "pdf", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) + ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.png"), device = "png", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) + ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.svg"), device = "svg", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F) # Plotting heatmaps and KEGG pathways for all pathways print("Plotting heatmaps...") @@ -2019,46 +2221,53 @@ for (file in contrast_files){ for (i in c(1:nrow(df))){ pathway <- df[i,] gene_list <- unlist(strsplit(pathway$intersection, ",")) - mat <- norm_counts[gene_list, ] - rownames(mat) <- mat$gene_name - mat$gene_name <- NULL - mat <- data.matrix(mat) + mat <- norm_counts[which(norm_counts$Ensembl_ID %in% gene_list), ] # Skip the heatmaps for CI tests because especially png() works only very unreliably for the heatmaps for some reason if (!isProvided(params$citest) & nrow(mat)>1){ dir.create(paste("pathway_analysis", fname, "pathway_heatmaps", sep="/")) - - # Because the image devices sometimes don't work here, surround them with a tryCatch so that the code continues even if a certain heatmap is not saved + + # Save matrix as table so that the values are accessible if heatmap plotting does not work properly + write.table(mat, file=paste0("pathway_analysis", "/",fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", make.names(pathway$term_id), ".tsv"), sep="\t", quote=F, row.names=F) + + # Reformat matrix so that it can be used by pheatmap (throws error if String columns/IDs are in the table) + rownames(mat) <- mat$gene_name + mat$gene_name <- NULL + mat$Ensembl_ID <- NULL + mat <- data.matrix(mat) + + # Because the image devices sometimes don't work here, surround them with a tryCatch so that the code continues even if a certain heatmap is not saved tryCatch( { - png(filename = paste0("pathway_analysis", "/",fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", pathway$term_id, "_",fname, ".png"), width = ifelse(nrow(mat)<=20, 200, 125)+ncol(mat)*40, height = 50+nrow(mat)*50) - pheatmap(mat = mat, annotation_col = metadata_save, main = paste("Heatmap of normalized expression values for genes in pathway ", pathway$short_name, "(",pathway$source,")",sep=" "), scale = "row", cluster_cols = F, cluster_rows = T) + png(filename = paste0("pathway_analysis", "/",fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", make.names(pathway$term_id), ".png"), width = ifelse(nrow(mat)<=20, 200, 125)+ncol(mat)*40, height = 50+nrow(mat)*50) + pheatmap(mat = mat, annotation_col = metadata_save, main = paste("Heatmap of normalized expression values (z-scores) for genes in pathway ", pathway$short_name, "(",pathway$source,")",sep=" "), scale = "row", cluster_cols = params$heatmaps_cluster_cols, cluster_rows = params$heatmaps_cluster_rows) dev.off() }, error=function(e) { - print(paste0("Could not save Heatmap_normalized_counts_", pathway$source, "_", pathway$term_id, "_",fname, ".png because of the following error:\n", e)) + print(paste0("Could not save pathway_analysis", "/",fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", make.names(pathway$term_id), ".png because of the following error:\n", e)) } ) tryCatch( { - pdf(paste0("pathway_analysis", "/", fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", pathway$term_id, "_", fname, ".pdf"), width = ifelse(nrow(mat)<=20, 50, 10)+ncol(mat)*0.1, height = 5+nrow(mat)*0.005) - pheatmap(mat = mat, annotation_col = metadata_save, main = paste("Heatmap of normalized expression values for genes in pathway ", pathway$short_name, "(",pathway$source,")",sep=" "), scale = "row", cluster_cols = F, cluster_rows = T) + pdf(paste0("pathway_analysis", "/", fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", make.names(pathway$term_id), ".pdf"), width = ifelse(nrow(mat)<=20, 50, 10)+ncol(mat)*0.1, height = 5+nrow(mat)*0.005) + pheatmap(mat = mat, annotation_col = metadata_save, main = paste("Heatmap of normalized expression values (z-scores) for genes in pathway ", pathway$short_name, "(",pathway$source,")",sep=" "), scale = "row", cluster_cols = params$heatmaps_cluster_cols, cluster_rows = params$heatmaps_cluster_rows) dev.off() }, error=function(e) { - print(paste0("Could not save Heatmap_normalized_counts_", pathway$source, "_", pathway$term_id, "_", fname, ".pdf because of the following error:\n", e)) + print(paste0("Could not save pathway_analysis", "/", fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", make.names(pathway$term_id), ".pdf because of the following error:\n", e)) } ) tryCatch( { - svg(paste0("pathway_analysis", "/", fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", pathway$term_id, "_", fname, ".svg"), width = ifelse(nrow(mat)<=20, 50, 10)+ncol(mat)*0.1, height = 5+nrow(mat)*0.005) - pheatmap(mat = mat, annotation_col = metadata_save, main = paste("Heatmap of normalized expression values for genes in pathway ", pathway$short_name, "(",pathway$source,")",sep=" "), scale = "row", cluster_cols = F, cluster_rows = T) + svg(paste0("pathway_analysis", "/", fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", make.names(pathway$term_id), ".svg"), width = ifelse(nrow(mat)<=20, 50, 10)+ncol(mat)*0.1, height = 5+nrow(mat)*0.005) + pheatmap(mat = mat, annotation_col = metadata_save, main = paste("Heatmap of normalized expression values (z-scores) for genes in pathway ", pathway$short_name, "(",pathway$source,")",sep=" "), scale = "row", cluster_cols = params$heatmaps_cluster_cols, cluster_rows = params$heatmaps_cluster_rows) dev.off() }, error=function(e) { - print(paste0("Could not save Heatmap_normalized_counts_", pathway$source, "_", pathway$term_id, "_", fname, ".svg because of the following error:\n", e)) + print(paste0("Could not save pathway_analysis", "/", fname, "/", "pathway_heatmaps", "/", "Heatmap_normalized_counts_", pathway$source, "_", make.names(pathway$term_id), ".svg because of the following error:\n", e)) } ) + graphics.off() # Close all still open devices from errors. Tried doing this in finally, but that doesn't work } # Plotting pathway view only for kegg pathways @@ -2070,9 +2279,7 @@ for (file in contrast_files){ print_string <- paste0("Plotting pathway: ", pathway_kegg) gene.data = DE_genes gene.data.subset = gene.data[gene.data$Ensembl_ID %in% gene_list, c("Ensembl_ID","log2FoldChange")] - entrez_ids = AnnotationDbi::mapIds(species_library_installed, keys=as.character(gene.data.subset$Ensembl_ID), column = "ENTREZID", keytype=params$keytype, multiVals="first") - gene.data.subset <- gene.data.subset[!(is.na(entrez_ids)),] if (length(entrez_ids)!=length(unique(entrez_ids))) { @@ -2104,8 +2311,8 @@ for (file in contrast_files){ if (isProvided(params$path_genelist)){ genelist_path = params$path_genelist - genelist_heatmaps_dir <- "heatmap_gene_list" - dir.create(paste("pathway_analysis", genelist_heatmaps_dir, sep="/")) + genelist_heatmaps_dir <- "differential_gene_expression/plots/heatmap_gene_list" + dir.create(genelist_heatmaps_dir) print("Plotting heatmaps...") conditions <- grepl("Condition", colnames(metadata)) @@ -2127,21 +2334,27 @@ if (isProvided(params$path_genelist)){ gene_list <- na.omit(gene_list) rownames(norm_counts) <- norm_counts$Ensembl_ID - mat <- norm_counts[gene_list, ] - rownames(mat) <- mat$gene_name - mat$gene_name <- NULL - mat$Ensembl_ID <- NULL - mat <- data.matrix(mat) + mat <- norm_counts[which(norm_counts$Ensembl_ID %in% gene_list), ] if (nrow(mat)>1){ - png(filename = paste0("pathway_analysis", "/", genelist_heatmaps_dir, "/", "Heatmap_normalized_counts_gene_list.png"), width = 100+ncol(mat)*50, height = 50+nrow(mat)*100) - pheatmap(mat = mat, annotation_col = metadata_save, main = "Genelist-specific heatmap of normalized expression values for genes", scale = "row", cluster_cols = F, cluster_rows = T) + + # Reformat matrix so that it can be used by pheatmap (throws error if String columns/IDs are in the table) + rownames(mat) <- mat$gene_name + mat$gene_name <- NULL + mat$Ensembl_ID <- NULL + mat <- data.matrix(mat) + + # Save matrix as table so that the values are accessible if heatmap plotting does not work properly + write.table(mat, file=paste0(genelist_heatmaps_dir, "/", "Heatmap_normalized_counts_gene_list.tsv"), sep="\t", quote=F, row.names=F) + + png(filename = paste0(genelist_heatmaps_dir, "/", "Heatmap_normalized_counts_gene_list.png"), width = 100+ncol(mat)*50, height = 50+nrow(mat)*100) + pheatmap(mat = mat, annotation_col = metadata_save, main = "Genelist-specific Heatmap of normalized expression values (z-scores) for genes", scale = "row", cluster_cols = params$heatmaps_cluster_cols, cluster_rows = params$heatmaps_cluster_rows) dev.off() - pdf(paste0("pathway_analysis", "/", genelist_heatmaps_dir, "/", "Heatmap_normalized_counts_gene_list.pdf"), width = 10+ncol(mat)*0.05, height = 5+nrow(mat)*0.005) - pheatmap(mat = mat, annotation_col = metadata_save, main = "Genelist-specific heatmap of normalized expression values for genes", scale = "row", cluster_cols = F, cluster_rows = T) + pdf(paste0(genelist_heatmaps_dir, "/", "Heatmap_normalized_counts_gene_list.pdf"), width = 10+ncol(mat)*0.05, height = 5+nrow(mat)*0.005) + pheatmap(mat = mat, annotation_col = metadata_save, main = "Genelist-specific Heatmap of normalized expression values (z-scores) for genes", scale = "row", cluster_cols = params$heatmaps_cluster_cols, cluster_rows = params$heatmaps_cluster_rows) dev.off() - svg(paste0("pathway_analysis", "/", genelist_heatmaps_dir, "/", "Heatmap_normalized_counts_gene_list.svg"), width = 10+ncol(mat)*0.1, height = 5+nrow(mat)*0.005) - pheatmap(mat = mat, annotation_col = metadata_save, main = "Genelist-specific heatmap of normalized expression values for genes", scale = "row", cluster_cols = F, cluster_rows = T) + svg(paste0(genelist_heatmaps_dir, "/", "Heatmap_normalized_counts_gene_list.svg"), width = 10+ncol(mat)*0.1, height = 5+nrow(mat)*0.005) + pheatmap(mat = mat, annotation_col = metadata_save, main = "Genelist-specific Heatmap of normalized expression values (z-scores) for genes", scale = "row", cluster_cols = params$heatmaps_cluster_cols, cluster_rows = params$heatmaps_cluster_rows) dev.off() } } @@ -2170,9 +2383,11 @@ Inside the pathway analysis results folder, a subfolder for each contrast used f

Expand/collapse - `*_gost_pathway_venn_diagram.pdf/png` - - Venn diagrams showing the numbers of enriched pathways when using a background gene list vs when not using a bg list. -- `*_KEGG_pathway_enrichment_plot.pdf/png` - - Barplots showing the proportion of differentially expressed genes in the pathway. + - Venn diagrams showing the numbers of enriched pathways when using a background gene list vs when not using a bg list. +- `enrichment_plots/*_pathway_enrichment_plot.{pdf/png/svg}` + - Barplots showing the proportion of differentially expressed genes in the pathway for a certain pathway database. +- `gost_pathway_gostplot.{pdf/png/svg}` + - Manhattan plots displaying all enriched pathways. - `KEGG_pathways/` - Contains the KEGG pathways graphs with the log fold change of the differentially expressed genes. - `pathway_heatmaps` @@ -2211,7 +2426,7 @@ names(q_list) <- q_names if (length(q_list) > 0) { #gost query - + if (isProvided(params$set_background)) { gostres <- gost(query=q_list, organism=gost_id, @@ -2422,8 +2637,8 @@ as.character(params$revision), "` pipeline [^5], which was written using the nf-core template [@ewels2020nf]. For differential expression analysis, the read quantification data resulting from `", quant_tool, "` were processed with the R package `DESeq2 v", packageVersion("DESeq2"), -"` [@love2014differential]. The thresholds for differentially expressed genes were set to ", -pval_text, " for the p-value and ", +"` [@love2014differential]. The thresholds for differentially expressed genes were set to ", +pval_text, " for the adjusted p-value and ", logFC_text, " for the log2 Fold Change." )) @@ -2434,16 +2649,25 @@ name_species = params$species_library version_species = packageVersion(name_species) if (isProvided(params$custom_gmt)){ - database_string <- paste0("Also, a gprofiler query with this custom GMT file was done: `", basename(params$custom_gmt), "`. Entries of the following datasources were considered: ") - dbs <- paste(datasources, collapse="`, `") - database_string <- paste0(database_string, "`", dbs, "`.") -} else { + database_string <- paste0("Also, a gprofiler query with this custom GMT file was done: `", basename(params$custom_gmt), "`.") + + if (isProvided(params$datasources)) { + database_string <- paste(database_string, "Entries of the following datasources were considered: ") + dbs <- paste(datasources, collapse="`, `") + database_string <- paste0(database_string, "`", dbs, "`.") + } else { + database_string <- "" + } + +} else if (isProvided(params$datasources)) { database_string = "Also, the following databases were queried: " database_version_info <- get_version_info(params$organism) database_string <- paste0(database_string, "`gprofiler: ", database_version_info$gprofiler_version, "`") for (db in datasources) { database_string <- paste0(database_string, ", `", db, ": ", database_version_info$sources[[db]]$version, "`") } +} else { + database_string <- paste("All datasources available in g:Profiler were considered (check https://biit.cs.ut.ee/gprofiler/gost and https://biit.cs.ut.ee/gprofiler/page/docs for more info).") } cat(paste0( @@ -2465,7 +2689,7 @@ The following lists all packages used for analysis, together with their versions ```{r versions} sessioninfo::session_info()%>% - details::details(summary = 'Session info/used packages') + details::details(summary = 'Session info/used packages') ``` --- diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json index 99786880..e4fc58d0 100644 --- a/assets/adaptivecard.json +++ b/assets/adaptivecard.json @@ -17,7 +17,7 @@ "size": "Large", "weight": "Bolder", "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", - "text": "nf-core/rnadeseq v${version} - ${runName}", + "text": "qbic-pipelines/rnadeseq v${version} - ${runName}", "wrap": true }, { diff --git a/assets/email_template.html b/assets/email_template.html index 3245bcd4..f93eb897 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,111 +1,53 @@ - - - - + + + + - - - qbic-pipelines/rnadeseq Pipeline Report - - -
- + + qbic-pipelines/rnadeseq Pipeline Report + + +
-

qbic-pipelines/rnadeseq v${version}

-

Run Name: $runName

+ - <% if (!success){ out << """ -
-

qbic-pipelines/rnadeseq execution completed unsuccessfully!

+

qbic-pipelines/rnadeseq ${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

qbic-pipelines/rnadeseq execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ } else { out << """ -
+
${errorReport}
+
+ """ +} else { + out << """ +
qbic-pipelines/rnadeseq execution completed successfully! -
- """ } %> +
+ """ +} +%> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
-$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> " - - - - - " }.join("\n") %> - -
- $k - -
$v
-
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
-

qbic-pipelines/rnadeseq

-

https://github.com/qbic-pipelines/rnadeseq

-
- +

qbic-pipelines/rnadeseq

+

https://github.com/qbic-pipelines/rnadeseq

+ +
+ + diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 04b860b4..b0f33b73 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -1,13 +1,12 @@ id: "nf-core-rnadeseq-methods-description" description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." -section_name: "nf-core/rnadeseq Methods Description" -section_href: "https://github.com/nf-core/rnadeseq" +section_name: "qbic-pipelines/rnadeseq Methods Description" +section_href: "https://github.com/qbic-pipelines/rnadeseq" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/rnadeseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

Data was processed using qbic-pipelines/rnadeseq v${workflow.manifest.version} ${doi_text} adapted from the nf-core workflow template (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}

${tool_citations}

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 737c3d27..d3a5bdca 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,13 +1,15 @@ report_comment: > - This report has been generated by the qbic-pipelines/rnadeseq + This report has been generated by the qbic-pipelines/rnadeseq analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: + "qbic-pipelines-rnadeseq-methods-description": + order: -1000 software_versions: order: -1001 - "qbic-pipelines-rnadeseq-summary": # this is necessary to prevent a linting error + "qbic-pipelines-rnadeseq-summary": order: -1002 - "qbic-pipelines-rnadeseq-methods-description": - order: -1000 export_plots: true + +disable_version_detection: true diff --git a/assets/schema_input.json b/assets/schema_input.json deleted file mode 100644 index 8f83ef7d..00000000 --- a/assets/schema_input.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/master/assets/schema_input.json", - "title": "qbic-pipelines/rnadeseq pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" - }, - "fastq_1": { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] - } - }, - "required": ["sample", "fastq_1"] - } -} diff --git a/bin/Execute_report.R b/bin/Execute_report.R index 5b9160d1..574fbc1e 100755 --- a/bin/Execute_report.R +++ b/bin/Execute_report.R @@ -34,6 +34,9 @@ option_list = list( make_option("--set_background", action="store_true", default=TRUE, help="Whether to use a background list for pathway analysis; if true, will only consider expressed genes (i.e. mean counts > 0) for PA."), make_option("--custom_background", type="character", default=NULL, help="Path to a custom background list TXT for pathway analysis; if provided, will only consider genes in that list for PA."), make_option(c("-w", "--min_DEG_pathway"), type="integer", default=NULL, help="min. number of genes DE in a pathway for this pathway to be considered enriched.", metavar="integer"), + make_option("--datasources", type="character", default=NULL, help="Which datasources to use for pathway analysis.", metavar="character"), + make_option("--heatmaps_cluster_rows", action="store_true", default=FALSE, help="Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways."), + make_option("--heatmaps_cluster_cols", action="store_true", default=FALSE, help="Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways."), make_option(c("-s", "--proj_summary"), type="character", default=NULL, help="Project summary file", metavar="character"), make_option(c("--path_quote"), type="character", default=NULL, help="Path to the quote PDF", metavar="character"), @@ -82,6 +85,9 @@ rmarkdown::render(opt$report, output_file = opt$output, knit_root_dir = wd, outp custom_background = opt$custom_background, keytype = opt$keytype, min_DEG_pathway = opt$min_DEG_pathway, + datasources = opt$datasources, + heatmaps_cluster_rows = opt$heatmaps_cluster_rows, + heatmaps_cluster_cols = opt$heatmaps_cluster_cols, path_proj_summary = opt$proj_summary, path_quote = opt$path_quote, diff --git a/bin/markdown_to_html.r b/bin/markdown_to_html.r deleted file mode 100755 index 212d793c..00000000 --- a/bin/markdown_to_html.r +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env Rscript - -# Command line argument processing -args = commandArgs(trailingOnly=TRUE) -if (length(args) < 2) { - stop("Usage: markdown_to_html.r ", call.=FALSE) -} -markdown_fn <- args[1] -output_fn <- args[2] - -# Load / install packages -if (!require("markdown")) { - install.packages("markdown", dependencies=TRUE, repos='http://cloud.r-project.org/') - library("markdown") -} - -base_css_fn <- getOption("markdown.HTML.stylesheet") -base_css <- readChar(base_css_fn, file.info(base_css_fn)$size) -custom_css <- paste(base_css, " -body { - padding: 3em; - margin-right: 350px; - max-width: 100%; -} -#toc { - position: fixed; - right: 20px; - width: 300px; - padding-top: 20px; - overflow: scroll; - height: calc(100% - 3em - 20px); -} -#toc_header { - font-size: 1.8em; - font-weight: bold; -} -#toc > ul { - padding-left: 0; - list-style-type: none; -} -#toc > ul ul { padding-left: 20px; } -#toc > ul > li > a { display: none; } -img { max-width: 800px; } -") - -markdownToHTML( - file = markdown_fn, - output = output_fn, - stylesheet = custom_css, - options = c('toc', 'base64_images', 'highlight_code') -) diff --git a/conf/base.config b/conf/base.config index 4900555b..e68d4caf 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,12 +10,11 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -24,7 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } @@ -59,7 +57,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/test.config b/conf/test.config index 05a27ad8..5280f668 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,6 +28,7 @@ params { software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions.csv' multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip' run_pathway_analysis = true + datasources = 'KEGG,REAC' genome = 'GRCm38' quote = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/offer_example.pdf' } diff --git a/conf/test_custom_gmt.config b/conf/test_custom_gmt.config index e3f84934..3ee2eab9 100644 --- a/conf/test_custom_gmt.config +++ b/conf/test_custom_gmt.config @@ -28,6 +28,7 @@ params { software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions.csv' multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip' run_pathway_analysis = true + datasources = 'KEGG,REAC' genome = 'GRCm38' custom_gmt = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/gprofiler_full_mmusculus.ENSG.gmt' } diff --git a/conf/test_smrnaseq.config b/conf/test_smrnaseq.config index 797fa332..6291a66d 100644 --- a/conf/test_smrnaseq.config +++ b/conf/test_smrnaseq.config @@ -24,6 +24,7 @@ params { genelist = 'testdata/smrnaseq/requested_genes.txt' input_type = 'smrnaseq' run_pathway_analysis = true + datasources = 'KEGG,REAC' genome = 'GRCh37' batch_effect = true set_background = false // This is solely to get some enriched pathways so that the github tests also check the pathway analysis output diff --git a/conf/test_star_rsem.config b/conf/test_star_rsem.config index 03c11cd5..2139133b 100644 --- a/conf/test_star_rsem.config +++ b/conf/test_star_rsem.config @@ -23,9 +23,10 @@ params { //report_options = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/report_options.yml' project_summary = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/summary.tsv' software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions_rsem.yml' - multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip' + multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/QDESQ/new_rsem_multiqc.zip' input_type = 'rsem' run_pathway_analysis = true + datasources = 'GO:CC,KEGG' genome = 'GRCh37' // species_library = "org.Hs.eg.db" // organism = "hsapiens" diff --git a/conf/test_star_salmon.config b/conf/test_star_salmon.config index 4a896dfe..3fd41063 100644 --- a/conf/test_star_salmon.config +++ b/conf/test_star_salmon.config @@ -23,9 +23,10 @@ params { //report_options = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/report_options.yml' project_summary = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/summary.tsv' software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions_salmon.yml' - multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip' + multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/QDESQ/new_salmon_multiqc.zip' input_type = 'salmon' run_pathway_analysis = true + datasources = 'KEGG,REAC' // genome = 'GRCh37' species_library = "org.Hs.eg.db" organism = "hsapiens" diff --git a/docs/output.md b/docs/output.md index 99131726..6cc4e044 100644 --- a/docs/output.md +++ b/docs/output.md @@ -52,8 +52,10 @@ If pathway analysis was run, this directory contains the zipped pathway analysis - `*_gost_pathway_venn_diagram.pdf/png` - Venn diagrams showing the numbers of enriched pathways when using a background gene list vs when not using a bg list. -- `*_KEGG_pathway_enrichment_plot.pdf/png` - - Barplots showing the proportion of differentially expressed genes in the pathway. +- `enrichment_plots/*_pathway_enrichment_plot.{pdf/png/svg}` + - Barplots showing the proportion of differentially expressed genes in the pathway for a certain pathway database. +- `gost_pathway_gostplot.{pdf/png/svg}` + - Manhattan plots displaying all enriched pathways. - `KEGG_pathways/` - Contains the KEGG pathways graphs with the log fold change of the differentially expressed genes. - `pathway_heatmaps` diff --git a/docs/usage.md b/docs/usage.md index 2d74b416..05da6ed0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -89,7 +89,7 @@ The `qbic-pipelines/rnadeseq` pipeline relies on the output from the `nf-core/rn The typical command for running the pipeline is as follows: ```bash -nextflow run qbic-pipelines/rnadeseq -r 2.0.1 -profile docker \ +nextflow run qbic-pipelines/rnadeseq -r 2.3 -profile docker \ --gene_counts 'merged_gene_counts.txt' \ --input 'QXXXX_sample_preparations.tsv' \ --model 'linear_model.txt' \ @@ -157,7 +157,7 @@ curl https://codeload.github.com/qbic-pipelines/rnadeseq/tar.gz/master | tar -xz Afterwards, you should be able to also run test_star_rsem and test_star_salmon from qbic-pipelines/rnadeseq without manually cloning, e.g.: ```bash -nextflow run qbic-pipelines/rnadeseq -r 2.0.1 -profile docker,test_star_salmon +nextflow run qbic-pipelines/rnadeseq -r 2.3 -profile docker,test_star_salmon ``` ### Reproducibility diff --git a/environment.yml b/environment.yml index cf3dc64d..c759c019 100644 --- a/environment.yml +++ b/environment.yml @@ -1,7 +1,7 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml # use this to find packages: https://anaconda.org/ -name: qbic-pipelines-rnadeseq-2.2 +name: qbic-pipelines-rnadeseq-2.3 channels: - bioconda - conda-forge diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 000bb929..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,331 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 3ae24dbf..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,63 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the qbic-pipelines/rnadeseq pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.tsv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/WorkflowRnadeseq.groovy b/lib/WorkflowRnadeseq.groovy deleted file mode 100755 index 7a1ae2bd..00000000 --- a/lib/WorkflowRnadeseq.groovy +++ /dev/null @@ -1,118 +0,0 @@ -// -// This file holds several functions specific to the workflow/rnadeseq.nf in the qbic-pipelines/rnadeseq pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowRnadeseq { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 469106d2..8c5cf574 100644 --- a/main.nf +++ b/main.nf @@ -9,6 +9,14 @@ nextflow.enable.dsl = 2 +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnadeseq_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES @@ -21,7 +29,7 @@ nextflow.enable.dsl = 2 if (params.input_type in ["rsem", "salmon"]) { if (!params.genome && !params.gtf) { exit 1, 'Please provide either genome or gtf file!' } else if (!params.gtf) { - params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') + params.gtf = getGenomeAttribute('gtf') if (!params.gtf) { exit 1, 'It seems that for your genome, no gtf file is defined. Please provide a gtf with the --gtf parameter or open a github issue: https://github.com/qbic-pipelines/rnadeseq/issues' } @@ -32,21 +40,21 @@ if (params.input_type in ["rsem", "salmon"]) { if (params.run_pathway_analysis) { if (!params.genome && !params.organism) { exit 1, 'Please provide either genome or organism!' } else if (!params.organism) { - params.organism = WorkflowMain.getGenomeAttribute(params, 'organism') + params.organism = getGenomeAttribute('organism') if (!params.organism) { exit 1, 'It seems that for your genome, no organism is defined. Please provide the organism with the --organism parameter or open a github issue: https://github.com/qbic-pipelines/rnadeseq/issues' } } if (!params.genome && !params.species_library) { exit 1, 'Please provide either genome or species_library!' } else if (!params.species_library) { - params.species_library = WorkflowMain.getGenomeAttribute(params, 'species_library') + params.species_library = getGenomeAttribute('species_library') if (!params.species_library) { exit 1, 'It seems that for your genome, no species_library is defined. Please provide the library with the --species_library parameter or open a github issue: https://github.com/qbic-pipelines/rnadeseq/issues' } } if (!params.genome && !params.keytype) { exit 1, 'Please provide either genome or keytype!' } else if (!params.keytype) { - params.keytype = WorkflowMain.getGenomeAttribute(params, 'keytype') + params.keytype = getGenomeAttribute('keytype') if (!params.keytype) { exit 1, 'It seems that for your genome, no keytype is defined. Please provide the keytype with the --keytype parameter or open a github issue: https://github.com/qbic-pipelines/rnadeseq/issues' } @@ -55,54 +63,46 @@ if (params.run_pathway_analysis) { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + NAMED WORKFLOW FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input 'samplesheet.tsv' --gene_counts 'merged_gene_counts.txt' --input 'sample_preparations.tsv' --model 'linear_model.txt' -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +include { RNADESEQ } from './workflows/rnadeseq' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + RUN ALL WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -include { RNADESEQ } from './workflows/rnadeseq' // -// WORKFLOW: Run main qbic-pipelines/rnadeseq analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow QBIC_RNADESEQ { + + main: + + // + // WORKFLOW: Run pipeline + // RNADESEQ () + } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { + + main: + + // + // WORKFLOW: Run main workflow + // QBIC_RNADESEQ () + } /* diff --git a/modules.json b/modules.json index 218e689a..e1bb226f 100644 --- a/modules.json +++ b/modules.json @@ -11,6 +11,25 @@ "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/report.nf b/modules/local/report.nf index eac509d0..5c0fcab1 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -1,6 +1,6 @@ process REPORT { - container 'ghcr.io/qbic-pipelines/rnadeseq:2.2' + container 'ghcr.io/qbic-pipelines/rnadeseq:2.3' input: path gene_counts @@ -19,6 +19,8 @@ process REPORT { path multiqc path custom_gmt path custom_background + path report_file + path references_file output: path "*.zip" @@ -37,6 +39,10 @@ process REPORT { def custom_gmt_opt = custom_gmt.name != 'NO_FILE3' ? "--custom_gmt $custom_gmt" : '' def set_background_opt = params.set_background ? "--set_background TRUE" : "--set_background FALSE" def custom_background_opt = custom_background.name != 'NO_FILE7' ? "--custom_background $custom_background" : '' + def datasources_opt = params.datasources ? "--datasources $params.datasources" : '' + def heatmaps_cluster_rows_opt = params.heatmaps_cluster_rows ? "--heatmaps_cluster_rows TRUE" : '' + def heatmaps_cluster_cols_opt = params.heatmaps_cluster_cols ? "--heatmaps_cluster_cols TRUE" : '' + def quote_opt = params.quote != 'NO_FILE5' ? "--path_quote $params.quote" : '' def software_versions_opt = params.software_versions != 'NO_FILE6' ? "--software_versions $params.software_versions" : '' @@ -47,10 +53,10 @@ process REPORT { if [ "$multiqc" != "NO_FILE4" ]; then unzip $multiqc mkdir QC - mv MultiQC/multiqc_plots/ MultiQC/multiqc_data/ MultiQC/multiqc_report.html QC/ + mv MultiQC/multiqc_plots/ MultiQC/multiqc_data/ MultiQC/multiqc_report.html QC/ || mv multiqc/*/multiqc_plots/ multiqc/*/multiqc_data/ multiqc/*/multiqc_report.html QC/ || mv multiqc_plots/ multiqc_data/ multiqc_report.html QC/ fi Execute_report.R \ - --report '$baseDir/assets/RNAseq_report.Rmd' \ + --report '$report_file' \ --output 'RNAseq_report.html' \ --input_type $params.input_type \ --gene_counts $gene_counts \ @@ -76,6 +82,9 @@ process REPORT { --species_library $params.species_library \ --keytype $params.keytype \ --min_DEG_pathway $params.min_DEG_pathway \ + $datasources_opt \ + $heatmaps_cluster_rows_opt \ + $heatmaps_cluster_cols_opt \ $quote_opt \ $software_versions_opt \ --proj_summary $proj_summary \ @@ -100,4 +109,3 @@ process REPORT { """ } - diff --git a/nextflow.config b/nextflow.config index 9018f037..8db8337c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,12 +39,17 @@ params { custom_gmt = 'NO_FILE3' set_background = true custom_background = 'NO_FILE7' + datasources = null + heatmaps_cluster_rows = true + heatmaps_cluster_cols = false // Additional args for the report project_summary = null multiqc = 'NO_FILE4' quote = 'NO_FILE5' software_versions = 'NO_FILE6' + report_file = "$projectDir/assets/RNAseq_report.Rmd" + references_file = "$projectDir/assets/references.bibtex" // Additional arg to indicate that a github test is run citest = false @@ -78,12 +83,9 @@ params { max_cpus = 16 max_time = '240.h' - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes' + // Schema validation default options (only those necessary for linting; rnadeseq does not use the validation plugin) + validationSchemaIgnoreParams = 'genomes,igenomes_base' validationShowHiddenParams = false - validate_params = true } // Load base.config by default for all pipelines @@ -96,7 +98,7 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } -// Load nf-core/rnadeseq custom profiles from different institutions. +// Load qbic-pipelines/rnadeseq custom profiles from different institutions. // Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/rnadeseq.config" @@ -105,92 +107,102 @@ try { // } profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } @@ -205,6 +217,13 @@ profiles { test_no_multiqc { includeConfig 'conf/test_no_multiqc.config' } test_custom_gmt { includeConfig 'conf/test_custom_gmt.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' // Load igenomes.config if required if (!params.igenomes_ignore) { @@ -227,6 +246,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -252,7 +274,7 @@ manifest { description = """Differential gene expression analysis and pathway analysis of RNAseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.2' + version = '2.3' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index adddbf59..aa6b89a2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -125,7 +125,8 @@ }, "custom_gmt": { "type": "string", - "description": "Path to custom GMT file for gost query, this allows e.g. to query older versions of databases for pathway analysis, not necessary if --skip_pathway_analysis = true." + "default": "NO_FILE3", + "description": "Path to custom GMT file for gost query, this allows e.g. to query older versions of databases for pathway analysis, not necessary if --skip_pathway_analysis = true. If --datasources is set, the GMT file will be filtered for these datasources." }, "set_background": { "type": "boolean", @@ -134,12 +135,27 @@ }, "custom_background": { "type": "string", + "default": "NO_FILE7", "description": "Path to custom background TXT file for gost query containing a gene ID in each line, not necessary if --skip_pathway_analysis = true or --set_background = false." }, "min_DEG_pathway": { "type": "integer", "default": 1, "description": "Integer indicating how many genes in a pathway must be differentially expressed to be considered as enriched, and report these pathways in tables and the final report. The default value is 1." + }, + "datasources": { + "type": "string", + "description": "Which datasources to use for pathway analysis, comma-separated string like 'KEGG,REAC'. See param 'sources' on https://rdrr.io/cran/gprofiler2/man/gost.html for a list of available sources. If not set, will use all sources. If set while a --custom_gmt is provided, will filter the GMT for these datasources (will not filter for the GO subtypes like GO:BP, just for GO)." + }, + "heatmaps_cluster_rows": { + "type": "boolean", + "default": true, + "description": "Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways." + }, + "heatmaps_cluster_cols": { + "type": "boolean", + "default": false, + "description": "Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways." } } }, @@ -155,16 +171,28 @@ }, "multiqc": { "type": "string", + "default": "NO_FILE4", "description": "multiqc.zip folder containing the multiQC plots and report." }, "quote": { "type": "string", + "default": "NO_FILE5", "description": "Quote file to link in the report." }, "software_versions": { "type": "string", - "default": "None", + "default": "NO_FILE6", "description": "CSV/YML containing nf-core/rnaseq software versions." + }, + "report_file": { + "type": "string", + "default": "${projectDir}/assets/RNAseq_report.Rmd", + "description": "Report Rmd file, should not be changed." + }, + "references_file": { + "type": "string", + "default": "${projectDir}/assets/references.bibtex", + "description": "Reference bibtex file for the report, should not be changed." } } }, @@ -176,7 +204,6 @@ "properties": { "genome": { "type": "string", - "default": "None", "description": "Which genome to use for analysis, e.g. GRCh37; see /conf/igenomes.config for which genomes are available." }, "igenomes_base": { @@ -211,7 +238,6 @@ "properties": { "publish_dir_mode": { "type": "string", - "default": "copy", "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", @@ -234,8 +260,7 @@ "properties": { "email": { "type": "string", - "description": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits", - "default": "false" + "description": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits" }, "email_on_fail": { "type": "string", @@ -281,19 +306,16 @@ }, "config_profile_description": { "type": "string", - "default": "false", "description": "Institutional config description.", "hidden": true }, "config_profile_contact": { "type": "string", - "default": "false", "description": "Institutional config contact information.", "hidden": true }, "config_profile_url": { "type": "string", - "default": "false", "description": "Institutional config URL link.", "hidden": true }, @@ -304,33 +326,12 @@ "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } }, @@ -339,7 +340,7 @@ "type": "object", "fa_icon": "fab fa-acquisitions-incorporated", "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", "properties": { "max_cpus": { "type": "integer", diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 2fdd1e4f..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - software_versions = SAMPLESHEET_CHECK.out.versions // channel: [ software_versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/utils_nfcore_rnadeseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnadeseq_pipeline/main.nf new file mode 100644 index 00000000..eb37c27b --- /dev/null +++ b/subworkflows/local/utils_nfcore_rnadeseq_pipeline/main.nf @@ -0,0 +1,16 @@ +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/testdata/QDESQ/new_rsem_multiqc.zip b/testdata/QDESQ/new_rsem_multiqc.zip new file mode 100644 index 00000000..797f3dca Binary files /dev/null and b/testdata/QDESQ/new_rsem_multiqc.zip differ diff --git a/testdata/QDESQ/new_salmon_multiqc.zip b/testdata/QDESQ/new_salmon_multiqc.zip new file mode 100644 index 00000000..0023fd22 Binary files /dev/null and b/testdata/QDESQ/new_salmon_multiqc.zip differ diff --git a/tests/test.yml b/tests/test.yml index 56ff217d..11489eb2 100644 --- a/tests/test.yml +++ b/tests/test.yml @@ -60,35 +60,36 @@ - path: results_test/differential_gene_expression/plots/PCA_plot.pdf - path: results_test/differential_gene_expression/plots/PCA_plot.png - path: results_test/differential_gene_expression/plots/PCA_plot.svg - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_venn_diagram.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_venn_diagram.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_venn_diagram.svg - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_pathway_enrichment_results.tsv - md5sum: be0f292a205f2ecd453ee27ecdc62615 - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_KEGG_pathway_enrichment_results.tsv - md5sum: f0e6d8117e3ff05ddabc3abb98f514b7 - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_REAC_pathway_enrichment_results.tsv - md5sum: 3d7616fd0f6cb3aaf9f08252b2bcd223 - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_REAC_pathway_enrichment_plot.png + - path: results_test/differential_gene_expression/plots/heatmap_gene_list/Heatmap_normalized_counts_gene_list.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_venn_diagram.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_venn_diagram.png + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_venn_diagram.svg + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.png + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/pathway_enrichment_results.tsv + md5sum: 92736a4c802ebd4e644682308ecf46dc + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathway_enrichment_results.tsv + md5sum: 92115e662e0e2489ad05d28f3981fa3a + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/REAC_pathway_enrichment_results.tsv + md5sum: 9077c63139668fd01d3f4086e6acb7bc + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.DE_contrast_condition_genotype_WT_vs_KO.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.xml - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.DE_contrast_condition_genotype_WT_vs_KO.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04610.xml - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_pathway_enrichment_results.tsv - md5sum: 0bd6dfd32bad6f5292f6cde91f019844 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_REAC_pathway_enrichment_results.tsv - md5sum: 452ca5dd8d5f190648988502f85d3ef1 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control_gost_pathway_venn_diagram.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control_gost_pathway_venn_diagram.png - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control_gost_pathway_venn_diagram.svg - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/heatmap_gene_list/Heatmap_normalized_counts_gene_list.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/pathway_enrichment_results.tsv + md5sum: 0a425c10e229bb775597be176c414705 + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/REAC_pathway_enrichment_results.tsv + md5sum: 6594fdb8c0eb591fa131982a8be00877 + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.png + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.svg + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/enrichment_plots/REAC_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/enrichment_plots/REAC_pathway_enrichment_plot.png - path: results_test/RNAseq_report.html diff --git a/tests/test_custom_gmt.yml b/tests/test_custom_gmt.yml index 589ccf13..baa32547 100644 --- a/tests/test_custom_gmt.yml +++ b/tests/test_custom_gmt.yml @@ -60,11 +60,15 @@ - path: results_test/differential_gene_expression/plots/PCA_plot.pdf - path: results_test/differential_gene_expression/plots/PCA_plot.png - path: results_test/differential_gene_expression/plots/PCA_plot.svg - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_pathway_enrichment_results.tsv + - path: results_test/differential_gene_expression/plots/heatmap_gene_list/Heatmap_normalized_counts_gene_list.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.png + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_venn_diagram.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_venn_diagram.png + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_venn_diagram.svg + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/pathway_enrichment_results.tsv md5sum: eae154d9c7a526cc8c3fcd1a586fa0ed - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_pathway_enrichment_results.tsv + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/pathway_enrichment_results.tsv md5sum: bd2cbbf85c3e241743e60b9b8263a31c - - path: results_test/pathway_analysis/heatmap_gene_list/Heatmap_normalized_counts_gene_list.pdf - path: results_test/RNAseq_report.html diff --git a/tests/test_full.yml b/tests/test_full.yml index c4477cbf..c44f2338 100644 --- a/tests/test_full.yml +++ b/tests/test_full.yml @@ -60,26 +60,29 @@ - path: results_test/differential_gene_expression/plots/PCA_plot.pdf - path: results_test/differential_gene_expression/plots/PCA_plot.png - path: results_test/differential_gene_expression/plots/PCA_plot.svg - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO_gost_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_pathway_enrichment_results.tsv - md5sum: be0f292a205f2ecd453ee27ecdc62615 - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_KEGG_pathway_enrichment_results.tsv - md5sum: f0e6d8117e3ff05ddabc3abb98f514b7 - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_REAC_pathway_enrichment_results.tsv - md5sum: 3d7616fd0f6cb3aaf9f08252b2bcd223 - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/DE_contrast_condition_genotype_WT_vs_KO_REAC_pathway_enrichment_plot.png + - path: results_test/differential_gene_expression/plots/heatmap_gene_list/Heatmap_normalized_counts_gene_list.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/gost_pathway_gostplot.png + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/pathway_enrichment_results.tsv + md5sum: 21b907d52008f66f9a85190400fd211e + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathway_enrichment_results.tsv + md5sum: 92115e662e0e2489ad05d28f3981fa3a + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/REAC_pathway_enrichment_results.tsv + md5sum: 9077c63139668fd01d3f4086e6acb7bc + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/KEGG_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/enrichment_plots/REAC_pathway_enrichment_plot.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.DE_contrast_condition_genotype_WT_vs_KO.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.png - path: results_test/pathway_analysis/DE_contrast_condition_genotype_WT_vs_KO/KEGG_pathways/mmu04360.xml - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_pathway_enrichment_results.tsv - md5sum: 0bd6dfd32bad6f5292f6cde91f019844 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_REAC_pathway_enrichment_results.tsv - md5sum: 452ca5dd8d5f190648988502f85d3ef1 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/DE_contrast_condition_treatment_Treated_vs_Control_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/heatmap_gene_list/Heatmap_normalized_counts_gene_list.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/pathway_enrichment_results.tsv + md5sum: bdc0167b8b33a0da44e34768eead8376 + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/REAC_pathway_enrichment_results.tsv + md5sum: 6594fdb8c0eb591fa131982a8be00877 + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/enrichment_plots/REAC_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/enrichment_plots/REAC_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.png + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_Treated_vs_Control/gost_pathway_venn_diagram.svg - path: results_test/RNAseq_report.html diff --git a/tests/test_smrnaseq.yml b/tests/test_smrnaseq.yml index 34ff104b..3cac7639 100644 --- a/tests/test_smrnaseq.yml +++ b/tests/test_smrnaseq.yml @@ -72,18 +72,18 @@ - path: results_test/differential_gene_expression/plots/PCA_plot_with_labels.pdf - path: results_test/differential_gene_expression/plots/PCA_plot_with_labels.png - path: results_test/differential_gene_expression/plots/PCA_plot_with_labels.svg - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/DE_contrast_condition_treatment_clone9_vs_clone1_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/DE_contrast_condition_treatment_clone9_vs_clone1_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/DE_contrast_condition_treatment_clone9_vs_clone1_KEGG_pathway_enrichment_plot.svg - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/DE_contrast_condition_treatment_clone9_vs_clone1_KEGG_pathway_enrichment_results.tsv - md5sum: 2637f5f6f4e74e7723280449e3ac647b - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/DE_contrast_condition_treatment_clone9_vs_clone1_pathway_enrichment_results.tsv - md5sum: a091e2ae9a678599d9e994082907d646 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/DE_contrast_condition_treatment_control_vs_clone1_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/DE_contrast_condition_treatment_control_vs_clone1_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/DE_contrast_condition_treatment_control_vs_clone1_KEGG_pathway_enrichment_plot.svg - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/DE_contrast_condition_treatment_control_vs_clone1_KEGG_pathway_enrichment_results.tsv - md5sum: 497cbe026880c9ea78c5591c7c4d07f7 - - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/DE_contrast_condition_treatment_control_vs_clone1_pathway_enrichment_results.tsv - md5sum: 63a2adf7c4a971dc7443503cf968e455 + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/enrichment_plots/KEGG_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/enrichment_plots/KEGG_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/enrichment_plots/KEGG_pathway_enrichment_plot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/KEGG_pathway_enrichment_results.tsv + md5sum: 326f3495ca42dbc527484cef3ee0e58d + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_clone9_vs_clone1/pathway_enrichment_results.tsv + md5sum: d63d4d96de3c49cc38c91cb863a7da06 + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/enrichment_plots/KEGG_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/enrichment_plots/KEGG_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/enrichment_plots/KEGG_pathway_enrichment_plot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/KEGG_pathway_enrichment_results.tsv + md5sum: 64b3a3716b779900e99f56688c4b27e8 + - path: results_test/pathway_analysis/DE_contrast_condition_treatment_control_vs_clone1/pathway_enrichment_results.tsv + md5sum: e7a051d8623043a3849eae8900b99f71 - path: results_test/RNAseq_report.html diff --git a/tests/test_star_rsem.yml b/tests/test_star_rsem.yml index c9cbdd49..110848e8 100644 --- a/tests/test_star_rsem.yml +++ b/tests/test_star_rsem.yml @@ -75,38 +75,43 @@ - path: results_test/differential_gene_expression/plots/PCA_plot.pdf - path: results_test/differential_gene_expression/plots/PCA_plot.png - path: results_test/differential_gene_expression/plots/PCA_plot.svg - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_pathway_enrichment_results.tsv - md5sum: b1c0a1374c963fff71083a9cdefe8e9b - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_pathway_enrichment_results.tsv - md5sum: 631e89b4de9042936fa2652bde036e7d - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_pathway_enrichment_results.tsv - md5sum: 80123c4ba2c97eaa0217816f41122855 - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_KEGG_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_KEGG_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_REAC_pathway_enrichment_plot.pdf - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/DE_contrast_condition_cellline_MCF7_vs_GM12878_REAC_pathway_enrichment_plot.png - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_KEGG_pathway_enrichment_results.tsv - md5sum: 276e871c81e85217bbd7eef951676084 - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_REAC_pathway_enrichment_results.tsv - md5sum: 9e6847fe5d631f09305dfa00dd3472d1 - - path: results_test/RNAseq_report.html + - path: results_test/differential_gene_expression/plots/heatmap_gene_list/Heatmap_normalized_counts_gene_list.pdf + - path: results_test/differential_gene_expression/plots/heatmap_gene_list/Heatmap_normalized_counts_gene_list.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/gost_pathway_gostplot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/gost_pathway_gostplot.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/gost_pathway_gostplot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/gost_pathway_venn_diagram.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/gost_pathway_venn_diagram.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/gost_pathway_venn_diagram.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/pathway_enrichment_results.tsv + md5sum: 57e56666c0d561659f5aaec478f04cee + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/GO.CC_pathway_enrichment_results.tsv + md5sum: 098f12540b4cad2ea1c950af1fd304b6 + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/gost_pathway_gostplot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/gost_pathway_gostplot.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/gost_pathway_gostplot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/gost_pathway_venn_diagram.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/gost_pathway_venn_diagram.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/gost_pathway_venn_diagram.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/pathway_enrichment_results.tsv + md5sum: 73fdb835cede09c13f0ab2684b1cda73 + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/GO.CC_pathway_enrichment_results.tsv + md5sum: 2ebd84e4ed2029659294a09d49b62048 + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/enrichment_plots/GO.CC_pathway_enrichment_plot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/gost_pathway_gostplot.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/gost_pathway_gostplot.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/gost_pathway_gostplot.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/gost_pathway_venn_diagram.pdf + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/gost_pathway_venn_diagram.png + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/gost_pathway_venn_diagram.svg + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_MCF7_vs_GM12878/pathway_enrichment_results.tsv + md5sum: b7f67bc86475b218965b173b753199b1 + - path: results_test/pathway_analysis/metadata/gprofiler_full_hsapiens.ENSG.gmt diff --git a/tests/test_star_salmon.yml b/tests/test_star_salmon.yml index dbe49c9a..2b111164 100644 --- a/tests/test_star_salmon.yml +++ b/tests/test_star_salmon.yml @@ -69,8 +69,8 @@ - path: results_test/differential_gene_expression/plots/PCA_plot.pdf - path: results_test/differential_gene_expression/plots/PCA_plot.png - path: results_test/differential_gene_expression/plots/PCA_plot.svg - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/DE_contrast_condition_cellline_H1_vs_GM12878_pathway_enrichment_results.tsv - md5sum: cd967cc355045d9dc8c0bb6ffcf40b61 - - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/DE_contrast_condition_cellline_K562_vs_GM12878_pathway_enrichment_results.tsv + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_H1_vs_GM12878/pathway_enrichment_results.tsv + md5sum: 68b329da9893e34099c7d8ad5cb9c940 + - path: results_test/pathway_analysis/DE_contrast_condition_cellline_K562_vs_GM12878/pathway_enrichment_results.tsv md5sum: 68b329da9893e34099c7d8ad5cb9c940 - path: results_test/RNAseq_report.html diff --git a/workflows/rnadeseq.nf b/workflows/rnadeseq.nf index ffbbde2c..ead6be2a 100644 --- a/workflows/rnadeseq.nf +++ b/workflows/rnadeseq.nf @@ -4,14 +4,10 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Validate input parameters -WorkflowRnadeseq.initialise(params, log) - -// TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist def checkPathParamList = [ - params.input, params.model, - ] + params.input, params.model +] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters @@ -35,41 +31,16 @@ ch_custom_gmt = Channel.fromPath(params.custom_gmt) ch_custom_background = Channel.fromPath(params.custom_background) ch_proj_summary_file = Channel.fromPath(params.project_summary) ch_softwareversions_file = Channel.fromPath(params.software_versions) +ch_report_file = Channel.fromPath(params.report_file) +ch_references_file = Channel.fromPath(params.references_file) -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowRnadeseq.initialise(params, log) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Loaded from modules/local/ -// -include { REPORT } from '../modules/local/report' -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// +include { REPORT } from '../modules/local/report' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -77,9 +48,6 @@ include { REPORT } from '../modules/local/report' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Installed directly from nf-core/modules -// include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* @@ -88,12 +56,9 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary workflow RNADESEQ { -// -// MODULE: RNAseq Report -// + main: REPORT ( ch_counts_path, @@ -111,7 +76,9 @@ workflow RNADESEQ { ch_softwareversions_file, ch_multiqc_file, ch_custom_gmt, - ch_custom_background + ch_custom_background, + ch_report_file, + ch_references_file ) //TODO: Enable this: @@ -122,25 +89,6 @@ workflow RNADESEQ { } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -//TODO: Have a look at email_on_fail -//Here I have to change the params? - -/* -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} -*/ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END