diff --git a/internal/step3/backup.json b/internal/step3/backup.json new file mode 100644 index 00000000..4db744ba --- /dev/null +++ b/internal/step3/backup.json @@ -0,0 +1,19478 @@ +[ + { + "_id": "0b2d6b7b-2e03-41e1-8b10-74ad41686e89", + "_updatedAt": "2024-09-16T07:32:05Z", + "author": { + "_ref": "rob-syme", + "_type": "reference" + }, + "_rev": "Qhrcj1462eoyp9RZGGQNso", + "_type": "blogPost", + "publishedAt": "2024-09-02T07:17:00.000Z", + "meta": { + "_type": "meta", + "description": "Performing interactive analysis is considered one of the most difficult phases in the entire bioinformatics process. User-friendly interactive environments that are adjacent to your data and streamline the end-to end analysis process are critical.\n", + "noIndex": false, + "slug": { + "current": "data-studios-image-segmentation", + "_type": "slug" + } + }, + "body": [ + { + "style": "normal", + "_key": "92fe05ff0537", + "markDefs": [], + "children": [ + { + "_key": "631a6e0767360", + "_type": "span", + "marks": [], + "text": "Scientific research is rarely direct, and workflows commonly require further downstream analyses beyond pipeline runs. While Nextflow excels at batch automation, human interpretation of the generated data is also an essential part of the scientific process. Interactive environments facilitate this process by enabling model refinement and report generation, increasing efficiency and facilitating informed decision-making." + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "fc43232ac000", + "markDefs": [ + { + "href": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8482564/", + "_key": "1bbb46da0dcc", + "_type": "link" + } + ], + "children": [ + { + "_key": "dbe6c56444880", + "_type": "span", + "marks": [], + "text": "Performing interactive analysis is considered one of the " + }, + { + "_key": "442e2de71eb8", + "_type": "span", + "marks": [ + "1bbb46da0dcc" + ], + "text": "most challenging steps in the entire bioinformatics process" + }, + { + "marks": [], + "text": ". Users face cumbersome, time-consuming, and error-prone manual tasks such as transferring data from the cloud to local storage and navigating various APIs, programming languages, libraries, and tools. ", + "_key": "201701c45af0", + "_type": "span" + }, + { + "marks": [ + "strong" + ], + "text": "User-friendly interactive environments", + "_key": "04f2d036269a", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " that exist adjacent to your data are critical to streamline end-to-end computational analyses.", + "_key": "9aeb813c2016" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "1a4b67c2eec10", + "_type": "span", + "marks": [], + "text": "Seqera’s " + }, + { + "_type": "span", + "marks": [ + "11674253ce1c" + ], + "text": "Data Studios", + "_key": "1a4b67c2eec11" + }, + { + "_key": "1a4b67c2eec12", + "_type": "span", + "marks": [], + "text": " bridges the gap between pipeline outputs and secure interactive analysis environments by bringing " + }, + { + "_key": "1a4b67c2eec13", + "_type": "span", + "marks": [ + "203ac6ca0086" + ], + "text": "reproducible, containerized and interactive analytical notebook environments" + }, + { + "marks": [], + "text": " to your data. In this way, the output of one workflow can be analyzed manually and be used as the input for a subsequent workflow. Here, we show how a scientist can use the Seqera Platform’s Runs and Data Studios features to ", + "_key": "1a4b67c2eec14", + "_type": "span" + }, + { + "text": "optimize image segmentation model iteration", + "_key": "1a4b67c2eec15", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "1a4b67c2eec16", + "_type": "span", + "marks": [], + "text": " in the " + }, + { + "_type": "span", + "marks": [ + "2e6de2623899" + ], + "text": "nf-core/molkart", + "_key": "1a4b67c2eec17" + }, + { + "text": " pipeline.", + "_key": "1a4b67c2eec18", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "20397f5fc865", + "markDefs": [ + { + "_type": "link", + "href": "https://docs.seqera.io/platform/latest/data/data-studios", + "_key": "11674253ce1c" + }, + { + "href": "https://seqera.io/blog/data-studios-announcement/", + "_key": "203ac6ca0086", + "_type": "link" + }, + { + "_key": "2e6de2623899", + "_type": "link", + "href": "https://nf-co.re/molkart/1.0.0" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "a3328a6c71f1", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "6fcebc357158", + "_type": "span", + "marks": [] + } + ] + }, + { + "style": "blockquote", + "_key": "915063774d4c", + "markDefs": [], + "children": [ + { + "_key": "3344b7d0b586", + "_type": "span", + "marks": [], + "text": "Watch the full presentation from Nextflow Summit in Boston, May 2024 " + } + ], + "_type": "block" + }, + { + "_type": "youtube", + "id": "sIFL-Pk9Wl4", + "_key": "2c940faebb5f" + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "How does image segmentation work?", + "_key": "700ac256fecf" + } + ], + "_type": "block", + "style": "h2", + "_key": "7df84a8fb865", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "A central task in molecular biology is quantifying the abundance of different molecules (often RNAs or proteins) per cell or structure. Traditionally, this was done by sampling entire tissues or, in later approaches, using single-cell methods to measure such molecules within each cell. However, both bulk and single-cell omics methods lose information about the spatial organization of cells within a tissue, a key factor during tissue development and a potential driver for diseases like cancer. Spatial omics, which combines imaging with ultra-sensitive assays to measure molecules, now allows the identification of hundreds to thousands of transcripts on tissue sections.", + "_key": "5a978796f4610" + } + ], + "_type": "block", + "style": "normal", + "_key": "6d0963afece2", + "markDefs": [] + }, + { + "_key": "806e4ab36139", + "markDefs": [ + { + "href": "http://nf-core/molkart", + "_key": "b3d4e5d943ce", + "_type": "link" + }, + { + "_key": "f2172ebc7417", + "_type": "link", + "href": "https://resolvebiosciences.com/" + }, + { + "_type": "link", + "href": "https://github.com/MouseLand/cellpose", + "_key": "aa9d7482adbf" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "b3d4e5d943ce" + ], + "text": "nf-core/molkart", + "_key": "707cd6cbb7851" + }, + { + "marks": [], + "text": " is a spatial transcriptomics pipeline for processing ", + "_key": "707cd6cbb7852", + "_type": "span" + }, + { + "_key": "707cd6cbb7853", + "_type": "span", + "marks": [ + "f2172ebc7417" + ], + "text": "Molecular Cartography data by Resolve Bioscience" + }, + { + "_key": "707cd6cbb7854", + "_type": "span", + "marks": [], + "text": ", which measures hundreds of RNA transcripts on a tissue section using single-molecule fluorescent in-situ hybridization (smFISH) (Figure 1). This pipeline includes a Nextflow module for the popular segmentation method " + }, + { + "_key": "707cd6cbb7855", + "_type": "span", + "marks": [ + "aa9d7482adbf" + ], + "text": "Cellpose" + }, + { + "_key": "707cd6cbb7856", + "_type": "span", + "marks": [], + "text": ", which allows a human-in-the-loop approach for improving cell segmentation. Conveniently, the nf-core/molkart pipeline includes a workflow branch for generating custom training data from a source data set. Training a performant, custom cellpose model typically requires multiple time consuming human-in-the-loop model iterations within an interactive analysis environment.\n" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "image", + "_key": "ff68a5ff91f0", + "asset": { + "_ref": "image-2bf639c49db818e0ac460c03bf1358c842865511-1600x900-png", + "_type": "reference" + } + }, + { + "_type": "block", + "style": "normal", + "_key": "782b164da2b9", + "markDefs": [ + { + "href": "https://www.biorxiv.org/content/10.1101/2024.02.05.578898v3", + "_key": "71d69b8521c9", + "_type": "link" + } + ], + "children": [ + { + "marks": [ + "strong", + "em" + ], + "text": "Figure 1. ", + "_key": "e38653a66779", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "em" + ], + "text": "Adapted workflow diagram of the nf-core/molkart pipeline for processing molecular cartography data using Nextflow. Original image data shown was taken from the literature (", + "_key": "ac613f5dd761" + }, + { + "_type": "span", + "marks": [ + "em", + "71d69b8521c9" + ], + "text": "Perico et al", + "_key": "6de7c14d3d24" + }, + { + "text": ".).", + "_key": "76c52cc20136", + "_type": "span", + "marks": [ + "em" + ] + } + ] + }, + { + "_key": "4d9dddc44892", + "markDefs": [ + { + "_type": "link", + "href": "https://www.biorxiv.org/content/10.1101/2024.02.05.578898v3", + "_key": "e387c2f0dc3e" + } + ], + "children": [ + { + "marks": [], + "text": "We used Data Studios to bring the tertiary analysis adjacent to the data in cloud storage, using data from ta 2024 preprint by ", + "_key": "d2dab384f2980", + "_type": "span" + }, + { + "text": "Perico et. al", + "_key": "46434dbd743d", + "_type": "span", + "marks": [ + "e387c2f0dc3e" + ] + }, + { + "marks": [], + "text": ". This allows us to iteratively train and improve a custom cellpose model for our specific dataset (Figure 2).", + "_key": "57b73961eea5", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "image", + "_key": "52cd6f4b80c7", + "asset": { + "_ref": "image-2d79c51097a15dfee042f120bbda1bebe4b129a4-1600x900-png", + "_type": "reference" + } + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://www.biorxiv.org/content/10.1101/2024.02.05.578898v3", + "_key": "2c25749c710f" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "strong", + "em" + ], + "text": "Figure 2. ", + "_key": "dfba0db9702b0" + }, + { + "text": "Adapted workflow diagram of the nf-core/molkart pipeline using Data Studios (highlighted in gray) to iteratively train a custom cellpose model to use as input for cell segmentation. Original image data shown was taken from the literature (", + "_key": "fdbbdf7dd8a9", + "_type": "span", + "marks": [ + "em" + ] + }, + { + "text": "Perico et al", + "_key": "862bb7f81cd3", + "_type": "span", + "marks": [ + "em", + "2c25749c710f" + ] + }, + { + "_key": "a88e1550c228", + "_type": "span", + "marks": [ + "em" + ], + "text": ".).\n" + } + ], + "_type": "block", + "style": "normal", + "_key": "7a444b4d08d3" + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Adding Data Studios to the workflow", + "_key": "ebdcac6fc010" + } + ], + "_type": "block", + "style": "h2", + "_key": "3b0ef9de8028", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "text": "Using Data Studios as part of an adapted workflow was extremely beneficial:", + "_key": "f286f6dec6a60", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "f7eb2c08e442" + }, + { + "children": [ + { + "_key": "797be56fead20", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "458e3394230e", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "ff0a1dded277", + "listItem": "number", + "markDefs": [ + { + "_key": "33bd89a47469", + "_type": "link", + "href": "https://napari.org/stable/" + }, + { + "href": "https://qupath.github.io/", + "_key": "7ddd78939bfc", + "_type": "link" + }, + { + "_type": "link", + "href": "https://imagej.net/software/fiji/", + "_key": "a8f46f9aa40a" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Rapid review of image training data", + "_key": "b9b6ceaf6cc30" + }, + { + "text": " –", + "_key": "b9b6ceaf6cc31", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": " ", + "_key": "b9b6ceaf6cc32" + }, + { + "_type": "span", + "marks": [], + "text": "Images can be quickly reviewed directly in the cloud-hosted Data Studio analysis environment using common tools such as ", + "_key": "b9b6ceaf6cc33" + }, + { + "_key": "b9b6ceaf6cc34", + "_type": "span", + "marks": [ + "33bd89a47469" + ], + "text": "napari" + }, + { + "marks": [], + "text": ", ", + "_key": "b9b6ceaf6cc35", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "7ddd78939bfc" + ], + "text": "QuPath", + "_key": "b9b6ceaf6cc36" + }, + { + "_type": "span", + "marks": [], + "text": ", or ", + "_key": "b9b6ceaf6cc37" + }, + { + "text": "Fiji", + "_key": "b9b6ceaf6cc38", + "_type": "span", + "marks": [ + "a8f46f9aa40a" + ] + }, + { + "text": ". Prior to Data Studios, bioinformaticians would typically download the images, review, and re-upload to blob storage.", + "_key": "b9b6ceaf6cc39", + "_type": "span", + "marks": [] + } + ], + "level": 1 + }, + { + "style": "normal", + "_key": "2ea3f4af5025", + "listItem": "number", + "markDefs": [], + "children": [ + { + "text": "Collaboratively train a custom model in-situ ", + "_key": "1774f85b64fa0", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "text": "– Using a GPU-enabled compute environment for the Data Studios session, we used cellpose to train a new custom model on-the-fly using the previously generated image crops. Using a shareable URL, Data Studios enables seamless collaboration between data scientists and bench scientists with domain expertise in a single location.", + "_key": "1774f85b64fa1", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block" + }, + { + "style": "normal", + "_key": "9aaefadfd9da", + "listItem": "number", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Apply the new model to the original data ", + "_key": "5596d4c9c4d70" + }, + { + "_type": "span", + "marks": [], + "text": "– The new, manually-trained model was then applied to the original, full size image dataset. The cell segmentation results of the custom model can be inspected in the same Data Studios instance using any standard tool.\n", + "_key": "3e9f21a0c9d6" + } + ], + "level": 1, + "_type": "block" + }, + { + "_type": "image", + "_key": "0b1b42edce43", + "asset": { + "_ref": "image-481b2d03aee5622987b731a4819945bdced48291-1920x1080-png", + "_type": "reference" + } + }, + { + "markDefs": [ + { + "_key": "b3bd36addbaf", + "_type": "link", + "href": "https://www.biorxiv.org/content/10.1101/2024.02.05.578898v3" + } + ], + "children": [ + { + "_key": "bc888466590a", + "_type": "span", + "marks": [ + "strong", + "em" + ], + "text": "Figure 3. " + }, + { + "text": "Schematic workflow of image segmentation using nf-core/molkart with (bottom) and without (top) Data Studios. Original image data shown was taken from the literature (", + "_key": "94887d38f64d", + "_type": "span", + "marks": [ + "em" + ] + }, + { + "marks": [ + "b3bd36addbaf", + "em" + ], + "text": "Perico et al", + "_key": "9bf7d5bb618e", + "_type": "span" + }, + { + "text": ".).", + "_key": "c8a5c75c8854", + "_type": "span", + "marks": [ + "em" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "164ca6145027" + }, + { + "style": "h2", + "_key": "bf6040867323", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "The benefits of Data Studios", + "_key": "ce0ed88dbf6a0" + } + ], + "_type": "block" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/fusion/", + "_key": "4c1738586069" + } + ], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Data remains in-situ", + "_key": "c727cb86c6ac0", + "_type": "span" + }, + { + "marks": [], + "text": " – No shuttling large volumes of data back and forth between your cloud storage and local analysis environments, which can quickly become expensive with ingress and egress charges, is extremely inefficient, and can result in data loss. Using the ", + "_key": "c727cb86c6ac1", + "_type": "span" + }, + { + "_key": "c727cb86c6ac2", + "_type": "span", + "marks": [ + "4c1738586069" + ], + "text": "Fusion file system" + }, + { + "_key": "c727cb86c6ac3", + "_type": "span", + "marks": [], + "text": ", Data Studios enables direct file access to cloud blob storage and is incredibly performant." + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "0e2b6a59dd98", + "listItem": "bullet" + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "28efa7b3f7af0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "b8155228df9a", + "markDefs": [] + }, + { + "style": "normal", + "_key": "9ca108cbb51b", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "Stable, containerized analysis environments", + "_key": "d389e047db4e0", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": " – Data Studio sessions are checkpointed, and can be rolled back to any previous state each time the session is stopped and restarted. Each checkpoint preserves the state of the running machine at a point in time, ensuring consistency and reproducibility of the environment, the software used, and data worked with.", + "_key": "d389e047db4e1" + } + ], + "level": 1, + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "6f1fd4d1251a0", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "ca582579a6fd" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "1415c744c5c3", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "Provision only the resources you need", + "_key": "52f988cd7a9d0", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "text": " – Data Studio sessions are fully customizable. Based on the analysis task(s) at hand, they can be provisioned as lean or as fully-featured as required, for example, making them GPU-enabled or adding hundreds of cores.", + "_key": "52f988cd7a9d1", + "_type": "span", + "marks": [] + } + ] + }, + { + "_key": "7e36cc508b12", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "36a4f06d680e0" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "c086ca066584", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Permissions are centrally managed", + "_key": "745473d296350" + }, + { + "_type": "span", + "marks": [], + "text": " – Organization and workspace credentials are centrally managed by your organization administrators, ensuring only authenticated users with the appropriate permissions can connect to the data and analysis environment(s). Bioinformaticians and data scientists shouldn’t spend time managing infrastructure and permissions.", + "_key": "745473d296351" + } + ], + "level": 1, + "_type": "block" + }, + { + "children": [ + { + "text": "", + "_key": "6320ec1510da0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "d6ed9176d77a", + "markDefs": [] + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Secure, real time collaboration –", + "_key": "b08f197315d30" + }, + { + "_type": "span", + "marks": [], + "text": " The shareable URL feature ensures safe collaboration within, or across, bioinformatician and data science teams.", + "_key": "b08f197315d31" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "5af126e9be9c" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "375ea8c072cb" + } + ], + "_type": "block", + "style": "normal", + "_key": "47ce6afd2c6e", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "text": "Streamline the entire data lifecycle", + "_key": "caf7f2d160530", + "_type": "span", + "marks": [ + "strong" + ] + } + ], + "_type": "block", + "style": "h2", + "_key": "ef8ae7eca4d6" + }, + { + "markDefs": [], + "children": [ + { + "text": "Data Studios can ", + "_key": "c08a98a9fb88", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "strong" + ], + "text": "streamline the entire end-to-end scientific data lifecycle", + "_key": "0ed1694aa9ad", + "_type": "span" + }, + { + "text": " by bringing reproducible, containerized and interactive analytical notebook environments to your data in real-time. This allows you to seamlessly transition from Nextflow pipeline outputs to secure interactive environments, consolidating data and analytics into one unified location.", + "_key": "a3942ac3f18e", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "84b24ceaf43a" + }, + { + "_key": "564b3312ce1a", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "befe51b498150" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "text": "“Data Studios enables the creation of the needed package environment for any project quickly, expediting the project start-up process. This allows us to promptly focus on data analysis and efficiently share the environment with the team”\n\n- ", + "_key": "b7d2c5cc7e600", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "strong" + ], + "text": "Lorena Pantano, PhD\nDirector of Bioinformatics Platform, Harvard Chan Bioinformatics Core", + "_key": "6fb6b1456ec5", + "_type": "span" + } + ], + "_type": "block", + "style": "blockquote", + "_key": "fbdc3a7ad46c" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "270b32efa102" + } + ], + "_type": "block", + "style": "normal", + "_key": "8c0662e7f397", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "View Data Studios in the Seqera Platform ", + "_key": "76dd999d4e8f0" + }, + { + "text": "Community Showcase workspace", + "_key": "76dd999d4e8f1", + "_type": "span", + "marks": [ + "98aa74b74492" + ] + }, + { + "_type": "span", + "marks": [], + "text": " or start a ", + "_key": "76dd999d4e8f2" + }, + { + "text": "free trial today", + "_key": "76dd999d4e8f3", + "_type": "span", + "marks": [ + "cf07f789df27" + ] + }, + { + "text": "!", + "_key": "76dd999d4e8f4", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "0fa70ffe4664", + "markDefs": [ + { + "_type": "link", + "href": "https://hubs.la/Q02Nhk4y0", + "_key": "98aa74b74492" + }, + { + "href": "https://hubs.la/Q02NhjDZ0", + "_key": "cf07f789df27", + "_type": "link" + } + ] + } + ], + "title": "Optimizing image segmentation modeling using Seqera Platform", + "tags": [ + { + "_ref": "82fd60f1-c6d0-4b8a-9c5d-f971c622f341", + "_type": "reference", + "_key": "07cea8dc5caa" + }, + { + "_ref": "f1d61674-9374-4d2c-97c2-55778db7c922", + "_type": "reference", + "_key": "5b7351bdac98" + }, + { + "_ref": "32377094-ace0-4f1e-bb48-b47f02d3849e", + "_type": "reference", + "_key": "cb6a3c4b282b" + }, + { + "_ref": "b70b4c8b-10e9-4630-b43f-e11b33f14daf", + "_type": "reference", + "_key": "48f31265ebdd" + }, + { + "_ref": "8c6a46a2-4653-49fb-a5c3-ddf572a75381", + "_type": "reference", + "_key": "231a068aa82a" + }, + { + "_ref": "2b5c9a56-b491-42aa-b291-86611d77ccec", + "_type": "reference", + "_key": "28d742bb8a84" + } + ], + "_createdAt": "2024-08-27T08:23:51Z" + }, + { + "author": { + "_ref": "109f0c7b-3d40-42a9-af77-3844f0e031c0", + "_type": "reference" + }, + "_createdAt": "2024-05-13T11:54:28Z", + "publishedAt": "2024-05-15T13:59:00.000Z", + "title": "nf-core/riboseq: A collaboration between Altos Labs and Seqera", + "_type": "blogPost", + "_updatedAt": "2024-05-15T10:12:46Z", + "tags": [ + { + "_key": "f30d3e591314", + "_ref": "b6511053-299b-4aa5-8957-94fb9ebc9493", + "_type": "reference" + }, + { + "_type": "reference", + "_key": "4525d8907a1f", + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058" + }, + { + "_key": "7c9827906277", + "_ref": "ab59634e-a349-468d-8f99-cb9fe4c38228", + "_type": "reference" + } + ], + "meta": { + "description": "nf-core/riboseq: A collaboration between Altos Labs and Seqera", + "noIndex": false, + "slug": { + "current": "nf-core-riboseq", + "_type": "slug" + }, + "_type": "meta", + "shareImage": { + "_type": "image", + "asset": { + "_ref": "image-10399aee1fa48e4250f2e7ab3c7fb76ca3aa1ac4-1200x628-png", + "_type": "reference" + } + } + }, + "body": [ + { + "markDefs": [], + "children": [ + { + "text": "This is a joint article contributed to the Seqera blog by Jon Manning of Seqera and Felix Krueger of Altos Labs describing the new nf-core/riboseq pipeline.", + "_key": "8c2ee84cdf5e0", + "_type": "span", + "marks": [ + "em" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "fc11d5317163" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://nf-co.re/", + "_key": "39d86b09469d" + }, + { + "_key": "f22304d582ae", + "_type": "link", + "href": "https://nf-co.re/riboseq" + }, + { + "_key": "23797f8146f8", + "_type": "link", + "href": "https://en.wikipedia.org/wiki/Ribosome_profiling" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "In April 2024, the bioinformatics community welcomed a significant addition to the ", + "_key": "5355407782e60" + }, + { + "text": "nf-core", + "_key": "5355407782e61", + "_type": "span", + "marks": [ + "39d86b09469d" + ] + }, + { + "_type": "span", + "marks": [], + "text": " suite: the ", + "_key": "5355407782e62" + }, + { + "_type": "span", + "marks": [ + "f22304d582ae" + ], + "text": "nf-core/riboseq", + "_key": "5355407782e63" + }, + { + "_type": "span", + "marks": [], + "text": " pipeline. This new tool, born from a collaboration between Altos Labs and Seqera, underscores the potential of strategic partnerships to advance scientific research. In this article, we provide some background on the project, offer details on the pipeline, and explain how readers can get started with ", + "_key": "5355407782e64" + }, + { + "_key": "5355407782e65", + "_type": "span", + "marks": [ + "23797f8146f8" + ], + "text": "Ribo-seq" + }, + { + "text": " analysis.", + "_key": "5355407782e66", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "a96b84f9b665" + }, + { + "_type": "block", + "style": "h2", + "_key": "ff2e29964409", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "A Fruitful Collaboration", + "_key": "06511e51fc0b" + } + ] + }, + { + "_key": "212704cdad6c", + "markDefs": [], + "children": [ + { + "text": "Altos Labs is known for its ambitious efforts in harnessing cellular rejuvenation to reverse disease, injury, and disabilities that can occur throughout life. Their scientific strategy heavily relies on understanding cellular mechanisms via advanced technologies. Ribo-seq provides insights into the real-time translation of proteins, a core process often dysregulated during aging and disease. Altos Labs needed a way to ensure reliable, reproducible Ribo-seq analysis that its research teams could use. While a Ribo-seq pipeline had been started in nf-core, limited progress had been made. Seqera seemed the ideal partner to help build one!", + "_key": "ef4460f305a4", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Seqera, known for creating and developing the ", + "_key": "402551d96a99" + }, + { + "_key": "a11895ee51be", + "_type": "span", + "marks": [ + "afd8d4976f75" + ], + "text": "Nextflow DSL" + }, + { + "text": " and being an active partner in establishing community standards on nf-core, brought the expertise needed to translate Altos Labs' vision into a viable community pipeline. As part of this collaboration, we formed a working group and also reached out to colleagues at ", + "_key": "206247a437cc", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "8fc76bfd5785" + ], + "text": "ZS", + "_key": "da520fc0d7f3" + }, + { + "text": " and other community members who had done prior work with Ribosome profiling in Nextflow. Our goal was not only to enhance Ribo-seq analysis capabilities but also to ensure the pipeline’s sustainability through a community-driven process.", + "_key": "c8e26b5b7392", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "3a4e325a6885", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/nextflow/", + "_key": "afd8d4976f75" + }, + { + "_type": "link", + "href": "https://www.zs.com/", + "_key": "8fc76bfd5785" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Development Insights", + "_key": "023772c169b7" + } + ], + "_type": "block", + "style": "h2", + "_key": "110443549dbc", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "1bb6d0dcf94a", + "markDefs": [], + "children": [ + { + "text": "The nf-core/riboseq project was structured into several phases:", + "_key": "fcef7ffc7722", + "_type": "span", + "marks": [] + } + ] + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "04af5c122b050", + "_type": "span", + "marks": [ + "em" + ], + "text": "Initial planning" + }, + { + "_type": "span", + "marks": [], + "text": ": This phase involved detailed discussions between the Scientific Development team at Seqera, Altos Labs, and expert partners to ensure alignment with best practices and effective tool selection.", + "_key": "04af5c122b051" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "9bd450582af3" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "ef189e78f7f5", + "listItem": "bullet", + "markDefs": [ + { + "_key": "3fa0f88295d5", + "_type": "link", + "href": "https://nf-co.re/rnaseq" + } + ], + "children": [ + { + "marks": [ + "em" + ], + "text": "Adapting existing components", + "_key": "4eb6302b38970", + "_type": "span" + }, + { + "text": ": Key pre-processing and alignment functions were adapted from the ", + "_key": "4eb6302b38971", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "3fa0f88295d5" + ], + "text": "nf-core/rnaseq", + "_key": "4eb6302b38972" + }, + { + "_key": "4eb6302b38973", + "_type": "span", + "marks": [], + "text": " pipeline, allowing for shareability, efficiency, and scalability." + } + ] + }, + { + "style": "normal", + "_key": "dc6acae62561", + "listItem": "bullet", + "markDefs": [ + { + "_key": "6be1a3f37f71", + "_type": "link", + "href": "https://github.com/zhpn1024/ribotish" + }, + { + "_type": "link", + "href": "https://github.com/smithlabcode/ribotricer", + "_key": "67a956a543b0" + }, + { + "_type": "link", + "href": "https://www.bioconductor.org/packages/release/bioc/html/anota2seq.html", + "_key": "5f9cca0d1922" + }, + { + "_type": "link", + "href": "https://biocontainers.pro/", + "_key": "a24a587b6c75" + }, + { + "_key": "d813571ed2e7", + "_type": "link", + "href": "https://github.com/nf-core/modules" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "em" + ], + "text": "New tool integration", + "_key": "f59020155b400" + }, + { + "text": ": Specific tools for Ribo-seq analysis, such as ", + "_key": "f59020155b401", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "6be1a3f37f71" + ], + "text": "Ribo-TISH", + "_key": "f59020155b402", + "_type": "span" + }, + { + "text": ", ", + "_key": "f59020155b403", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "67a956a543b0" + ], + "text": "Ribotricer", + "_key": "f59020155b404" + }, + { + "_type": "span", + "marks": [], + "text": ", and ", + "_key": "f59020155b405" + }, + { + "marks": [ + "5f9cca0d1922" + ], + "text": "anota2seq", + "_key": "f59020155b406", + "_type": "span" + }, + { + "_key": "f59020155b407", + "_type": "span", + "marks": [], + "text": ", were wrapped into modules using " + }, + { + "_type": "span", + "marks": [ + "a24a587b6c75" + ], + "text": "Biocontainers", + "_key": "f59020155b408" + }, + { + "text": ", within comprehensive testing frameworks to prevent regression and ensure reliability. These components were contributed to the ", + "_key": "f59020155b409", + "_type": "span", + "marks": [] + }, + { + "_key": "f59020155b4010", + "_type": "span", + "marks": [ + "d813571ed2e7" + ], + "text": "nf-core/modules" + }, + { + "_type": "span", + "marks": [], + "text": " repository, which will now be available for the wider community to reuse, independent of this effort.", + "_key": "f59020155b4011" + } + ], + "level": 1, + "_type": "block" + }, + { + "_key": "06ef45923942", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "Pipeline development", + "_key": "27cae4355e4d0", + "_type": "span", + "marks": [ + "em" + ] + }, + { + "text": ": Individual components were stitched together coherently to create the nf-core/riboseq pipeline, with its own testing framework and user documentation.", + "_key": "27cae4355e4d1", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "_key": "a11532d70cbc", + "markDefs": [], + "children": [ + { + "text": "Technical and Community Challenges", + "_key": "9af59990c0c00", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "449d8032618a", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Generalizing existing functionality", + "_key": "262d18dad67e0", + "_type": "span" + } + ], + "_type": "block", + "style": "h3" + }, + { + "_type": "block", + "style": "normal", + "_key": "af3382a99d21", + "markDefs": [ + { + "_key": "601d56009a00", + "_type": "link", + "href": "https://nf-co.re/modules" + }, + { + "href": "https://nf-co.re/subworkflows", + "_key": "9d9691a3a5b4", + "_type": "link" + } + ], + "children": [ + { + "marks": [], + "text": "nf-core has become an encyclopedia of components, including ", + "_key": "48bd49dd01300", + "_type": "span" + }, + { + "text": "modules", + "_key": "48bd49dd01301", + "_type": "span", + "marks": [ + "601d56009a00" + ] + }, + { + "marks": [], + "text": " and ", + "_key": "48bd49dd01302", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "9d9691a3a5b4" + ], + "text": "subworkflows", + "_key": "48bd49dd01303" + }, + { + "_key": "48bd49dd01304", + "_type": "span", + "marks": [], + "text": " that developers can leverage to build Nextflow pipelines. RNA-seq data analysis, in particular, is well served by the nf-core/rnaseq pipeline, one of the longest-standing and most popular members of the nf-core community. Some of the components used in nf-core/rnaseq were not written with re-use in mind, so the first task in this project was to abstract the commodity components for processes such as preprocessing and quantification so that they could be effectively shared by the nf-core/riboseq pipeline." + } + ] + }, + { + "_key": "5669adb1dcd3", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Test dataset generation", + "_key": "68d04249013b0" + } + ], + "_type": "block", + "style": "h3" + }, + { + "_key": "2767c14b9d80", + "markDefs": [], + "children": [ + { + "_key": "a4da2ac411130", + "_type": "span", + "marks": [], + "text": "Another significant hurdle was generating robust test data capable of supporting the ongoing quality assurance of our software. In Ribo-seq analysis, the basic operation of some tools depends on the quality of input data, so random down-sampling of variable quality input reads, especially at shallow depths may not be useful to generate test data. To overcome this, we implemented a targeted down-sampling strategy, selectively using input reads that meet high-quality standards and are known to align well with a specific chromosome. This method enabled us to produce a concise yet effective test data set, ensuring that our Ribo-seq tools operate reliably under realistic conditions." + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Tool selection", + "_key": "27bceef25c9e0" + } + ], + "_type": "block", + "style": "h3", + "_key": "2aaebc117fde" + }, + { + "markDefs": [], + "children": [ + { + "text": "A primary challenge in developing the pipeline was the selection of high-quality, sustainable software. In bioinformatics, funding often limits software development, and many tools are poorly maintained. Furthermore, the understanding of what software 'works' can be ambiguous, embedded in the community's shared knowledge rather than documented formally. Our cooperative approach enabled us to make informed decisions and contribute improvements to the underlying software, enhancing utility for users beyond the nf-core community.", + "_key": "42c9c78112020", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "1cb88d14f05e" + }, + { + "children": [ + { + "_key": "b2c37914fe590", + "_type": "span", + "marks": [], + "text": "Parameter selection" + } + ], + "_type": "block", + "style": "h3", + "_key": "d1e0de03d5a1", + "markDefs": [] + }, + { + "style": "normal", + "_key": "2523548b9954", + "markDefs": [], + "children": [ + { + "text": "Selecting the correct parameter settings for optimal operation of bioinformatics tools is a perennial problem in the community. In particular, the settings for the STAR alignment algorithm have very different constraints in Ribo-seq analysis relative to generic RNA-seq analysis. We conducted a series of benchmarks to assess the impact on alignment statistics of various combinations of parameters. We settled on a starting set, but this is a subject of continuing discussion with community members to drive further optimizations.", + "_key": "decd6cfc25240", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Pipeline Features", + "_key": "9a31de208e060", + "_type": "span" + } + ], + "_type": "block", + "style": "h2", + "_key": "a8c53464a53f" + }, + { + "_key": "45f1476190e5", + "markDefs": [], + "children": [ + { + "_key": "f51ea64a9e180", + "_type": "span", + "marks": [], + "text": "The nf-core/riboseq pipeline is now a robust framework written using the nf-core pipeline template, and specifically tailored to handle the complexities of Ribo-seq data analysis." + } + ], + "_type": "block", + "style": "normal" + }, + { + "asset": { + "_type": "reference", + "_ref": "image-83f90945d29b41fcdc562789b06f3abbdbfa4d9a-1010x412-png" + }, + "_type": "image", + "_key": "9024177c2c73" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Here is what it offers:", + "_key": "3460577cae3f", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "c4c2c021e47b" + }, + { + "style": "normal", + "_key": "cfb811774489", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "5e7ebc27391f0", + "_type": "span", + "marks": [], + "text": "Baseline read preprocessing using processes adapted from existing nf-core components." + } + ], + "level": 1, + "_type": "block" + }, + { + "_key": "f78073ef3267", + "listItem": "bullet", + "markDefs": [ + { + "_key": "159e3bc6217d", + "_type": "link", + "href": "https://github.com/alexdobin/STAR" + } + ], + "children": [ + { + "marks": [], + "text": "Alignment to references with ", + "_key": "4ce6dc424aed0", + "_type": "span" + }, + { + "marks": [ + "159e3bc6217d" + ], + "text": "STAR", + "_key": "4ce6dc424aed1", + "_type": "span" + }, + { + "_key": "4ce6dc424aed2", + "_type": "span", + "marks": [], + "text": ", producing both transcriptome and genome alignments." + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "3cdb46402566", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "1b345d3fa4f80", + "_type": "span", + "marks": [], + "text": "Analysis of read distribution around protein-coding regions to assess frame bias and P-site offsets. This produces a rich selection of diagnostic plots to assess Ribo-seq data quality." + } + ], + "level": 1 + }, + { + "style": "normal", + "_key": "9e3414d59445", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Prediction and identification of translated open reading frames using tools like Ribo-TISH and Ribotricer.", + "_key": "3299c56efe000", + "_type": "span" + } + ], + "level": 1, + "_type": "block" + }, + { + "style": "normal", + "_key": "9e8c117a96a2", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Assessment of translational efficiency, which requires matched RNA-seq and Ribo-seq data, facilitated by the anota2seq Bioconductor package (see dot plot below).", + "_key": "c39d9d7b14f8" + } + ], + "_type": "block" + }, + { + "asset": { + "_ref": "image-ca5f9967df813470051fcf548e962bdbf4c50ee5-624x624-png", + "_type": "reference" + }, + "_type": "image", + "_key": "7122c68ade88" + }, + { + "markDefs": [], + "children": [ + { + "_key": "57c0e67a28250", + "_type": "span", + "marks": [ + "em" + ], + "text": "An example result from anota2seq, a tool used to study gene expression, shows how transcription and translation are connected. The x-axis shows changes in overall mRNA levels (transcription) between a treated and a control group, while the y-axis displays changes in the rate of protein synthesis (translation) between those groups, as measured by Ribo-seq. Grey points represent genes with no significant change in either metric and most points align near the center of the x-axis, indicating little change in mRNA levels. However, some genes exhibit increased (orange) or decreased (red) protein synthesis, suggesting direct regulation of translation rather than changes driven solely by mRNA abundance." + } + ], + "_type": "block", + "style": "normal", + "_key": "067ad9c9d6d7" + }, + { + "children": [ + { + "marks": [], + "text": "If you are a researcher interested in Ribo-seq data analysis, you can test the pipeline by following the instructions in the ", + "_key": "e5078088e49b0", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "34ab33c4a8e1" + ], + "text": "getting started", + "_key": "e5078088e49b1" + }, + { + "_type": "span", + "marks": [], + "text": " section of the pipeline. Please feel free to submit bugs and feature requests to drive ongoing improvements. You can also become part of the conversation by joining the ", + "_key": "e5078088e49b2" + }, + { + "text": "#riboseq", + "_key": "e5078088e49b3", + "_type": "span", + "marks": [ + "218183b5348d" + ] + }, + { + "_type": "span", + "marks": [], + "text": " channel in the nf-core community Slack workspace. We would love to see you there!", + "_key": "e5078088e49b4" + } + ], + "_type": "block", + "style": "normal", + "_key": "46beba019134", + "markDefs": [ + { + "_type": "link", + "href": "https://nf-co.re/riboseq/#usage", + "_key": "34ab33c4a8e1" + }, + { + "_type": "link", + "href": "https://nfcore.slack.com/channels/riboseq", + "_key": "218183b5348d" + } + ] + }, + { + "style": "h2", + "_key": "515022911e71", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Next Steps", + "_key": "bd13a8c55f6e" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Following this initial phase of work, Seqera and Altos Labs have handed over the nf-core/riboseq pipeline to the nf-core community for ongoing maintenance and development. As members of that community, we will continue to play a part in enhancing the pipeline going forward. We hope others will benefit from this effort and continue to improve and refine pipeline functionality.", + "_key": "14a152a9174f0" + } + ], + "_type": "block", + "style": "normal", + "_key": "2d75d51ff270" + }, + { + "style": "normal", + "_key": "09c10fe38376", + "markDefs": [ + { + "href": "https://github.com/iraiosub/riboseq-flow", + "_key": "46fa6099abc2", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Coincidentally the authors of ", + "_key": "98347010c2330" + }, + { + "text": "riboseq-flow", + "_key": "98347010c2331", + "_type": "span", + "marks": [ + "46fa6099abc2" + ] + }, + { + "_type": "span", + "marks": [], + "text": " published their related work on the same day that nf-core/riboseq was first released. This pipeline has a highly complementary set of steps, and there is already ongoing collaboration to work together to build an even better community resource.", + "_key": "98347010c2332" + } + ], + "_type": "block" + }, + { + "_key": "c566b4d435e3", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Empowering Research and Innovation", + "_key": "e5fdf870848b0" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "99da8271ab0f", + "markDefs": [], + "children": [ + { + "_key": "35352a1b306b0", + "_type": "span", + "marks": [], + "text": "The joint contribution of Seqera and Altos Labs to the nf-core/riboseq pipeline highlights how collaboration between industry and open-source communities can result in tools that push scientific boundaries and foster community engagement and development. By adhering to rigorous code quality and testing standards, nf-core/riboseq ensures researchers access to a dependable, cutting-edge tool." + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "56719298b452", + "markDefs": [ + { + "href": "mailto:services@seqera.io", + "_key": "ccafa728bca7", + "_type": "link" + } + ], + "children": [ + { + "marks": [], + "text": "We believe this new pipeline is poised to be vital in studying protein synthesis and its implications for aging and health. This is not just a technical achievement - it's a step forward in collaborative, open scientific progress.", + "_key": "53386085eb760", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [ + { + "href": "mailto:services@seqera.io", + "_key": "ccafa728bca7", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "If you have a project in mind where Seqera may be able to help with our Professional Services offerings, please contact us at ", + "_key": "cafe02f0755d" + }, + { + "text": "services@seqera.io", + "_key": "53386085eb761", + "_type": "span", + "marks": [ + "ccafa728bca7" + ] + }, + { + "_type": "span", + "marks": [], + "text": ". We are the content experts for Nextflow, nf-core, and the Seqera Platform, and can offer tailored solutions and expert guidance to help you fulfill your objectives.", + "_key": "53386085eb762" + } + ], + "_type": "block", + "style": "normal", + "_key": "6e42514da79e" + }, + { + "children": [ + { + "marks": [], + "text": "To learn more about Altos Labs, visit ", + "_key": "3babdea8c79d0", + "_type": "span" + }, + { + "_key": "3babdea8c79d1", + "_type": "span", + "marks": [ + "026178e92bb6" + ], + "text": "https://www.altoslabs.com/" + }, + { + "text": ".", + "_key": "3babdea8c79d2", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "a5dc365dc556", + "markDefs": [ + { + "href": "https://www.altoslabs.com/", + "_key": "026178e92bb6", + "_type": "link" + } + ] + }, + { + "_key": "5b95f381569b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Acknowledgments", + "_key": "48b61c9282e00" + } + ], + "_type": "block", + "style": "h2" + }, + { + "markDefs": [], + "children": [ + { + "text": "nf-core/riboseq was initially written by Jonathan Manning (Bioinformatics Engineer at Seqera) in collaboration with Felix Krueger and Christel Krueger (Altos Labs). The development work carried out on the pipeline was funded by Altos Labs. We thank the following people for their input (", + "_key": "d836d0eff50e0", + "_type": "span", + "marks": [] + }, + { + "_key": "d836d0eff50e1", + "_type": "span", + "marks": [ + "em" + ], + "text": "in alphabetical order" + }, + { + "text": "):", + "_key": "d836d0eff50e2", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "258428890647" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "be9ad649bb8d", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "376c006c20de0", + "_type": "span", + "marks": [], + "text": "Felipe Almeida (ZS)" + } + ] + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Anne Bresciani (ZS)", + "_key": "6046a5e41c110" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "abb0a8d9fba2" + }, + { + "style": "normal", + "_key": "31c2f31a40bc", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Caroline Eastwood (University of Edinburgh)", + "_key": "040c3d125ae60", + "_type": "span" + } + ], + "level": 1, + "_type": "block" + }, + { + "_key": "ce8f076685cf", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "f3c530a930470", + "_type": "span", + "marks": [], + "text": "Maxime U Garcia (Seqera)" + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "7b34ffefab7d", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Mikhail Osipovitch (ZS)", + "_key": "e21649c58e7b0", + "_type": "span" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_key": "1f18a294d9a20", + "_type": "span", + "marks": [], + "text": "Jack Tierney (University College Cork)" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "02884c22d195", + "listItem": "bullet" + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "Edward Wallace (University of Edinburgh)\n\n", + "_key": "86b2bce07178", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "8f03c90bd810" + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "1da880ad30a0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "e59fb1d47363", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "736ce4dde440", + "markDefs": [], + "children": [ + { + "text": "\n\n", + "_key": "1c8d35ffcae9", + "_type": "span", + "marks": [] + } + ] + } + ], + "_id": "0d583937-1d7f-4c31-9e79-d8f1e5f2a2da", + "_rev": "UBGILU345IzqgWYhEN5Di2" + }, + { + "title": "Bioinformatics events you can’t miss in fall 2024 and early 2025", + "_rev": "odsN0KVxadbI50QPUHiVWo", + "publishedAt": "2024-09-24T09:27:00.000Z", + "meta": { + "description": "Get ready to mark your calendars because the fall of 2024 is going to be jam-packed with amazing opportunities to expand your knowledge, make new connections, and stay at the forefront of bioinformatics!", + "noIndex": false, + "slug": { + "current": "bioinformatics-events-2024-2025", + "_type": "slug" + }, + "_type": "meta" + }, + "_id": "15c75021-e091-4854-9aa0-fc04970ec963", + "tags": [ + { + "_type": "reference", + "_key": "851fad916bc4", + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058" + } + ], + "_createdAt": "2024-09-24T07:27:22Z", + "author": { + "_ref": "irina-silva", + "_type": "reference" + }, + "_type": "blogPost", + "body": [ + { + "markDefs": [], + "children": [ + { + "text": "Bioinformaticians worldwide, get ready to mark your calendars: Fall 2024 is looking jam-packed with amazing opportunities to learn, connect, and stay at the forefront of bioinformatics!", + "_key": "005350c7abb30", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "a4fd9b6dafb3" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "83660dc9adba0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "616a0b80c1b7" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "With so many fantastic events happening worldwide, we've handpicked those that are bioinformatics-focused or feature bioinformatics tracks – so you can be sure not to miss the ones most relevant to you. \n\nHere is our curated compilation of some of the best industry events to attend this fall in Europe, North America, and Asia-Pacific, as well as a sneak peak of events coming up in 2025.", + "_key": "b2ce77baf4420" + } + ], + "_type": "block", + "style": "normal", + "_key": "1772499dc381", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "736725c102d0", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "bc91ea5b179f0" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "text": "Top bioinformatics events in Europe", + "_key": "ba883d260fbf0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2", + "_key": "80b88306ce1f" + }, + { + "_key": "6ef975430bd0", + "markDefs": [ + { + "href": "https://summit.nextflow.io/2024/barcelona/?utm_campaign=Summit%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "a58cd46a7162", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "a58cd46a7162", + "strong" + ], + "text": "Nextflow Summit Barcelona", + "_key": "504b028511d50" + } + ], + "_type": "block", + "style": "h3" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "e1cc809749e10", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "7408e5fde66f" + }, + { + "children": [ + { + "text": "Location:", + "_key": "ba521555b8600", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "marks": [], + "text": " Barcelona, Spain", + "_key": "e38d045f888b", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "eedc79c932b6", + "markDefs": [] + }, + { + "style": "normal", + "_key": "4a2541ddb2bf", + "markDefs": [], + "children": [ + { + "text": "Dates:", + "_key": "6f4fbc08e04d0", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "3022f367802b", + "_type": "span", + "marks": [], + "text": " October 28 - November 1, 2024" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "92a0bd002639", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "In-person | Online", + "_key": "b6f470c2fc4a0" + } + ], + "_type": "block" + }, + { + "_key": "81af197467b0", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "7b2910b77cf90" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "f2f63e0ffe44", + "markDefs": [ + { + "_key": "e53a5efb455a", + "_type": "link", + "href": "https://summit.nextflow.io/2024/boston/?utm_campaign=Summit%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024" + }, + { + "_type": "link", + "href": "https://seqera.io?utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "d19e6bd20b41" + }, + { + "_type": "link", + "href": "https://seqera.io/nextflow/?utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "3f0fdb18390e" + }, + { + "_key": "3b66656713f3", + "_type": "link", + "href": "https://summit.nextflow.io/2024/barcelona/training/?utm_campaign=Summit%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024" + }, + { + "_type": "link", + "href": "https://summit.nextflow.io/2024/barcelona/hackathon/?utm_campaign=Summit%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "3a54f51827fc" + } + ], + "children": [ + { + "text": "Did you miss out on the ", + "_key": "4b0e9968a18b0", + "_type": "span", + "marks": [] + }, + { + "_key": "4b0e9968a18b1", + "_type": "span", + "marks": [ + "e53a5efb455a" + ], + "text": "Nextflow Summit in Boston" + }, + { + "_key": "4b0e9968a18b2", + "_type": "span", + "marks": [], + "text": " earlier this year? Don’t worry! The premier event in bioinformatics, from " + }, + { + "_type": "span", + "marks": [ + "d19e6bd20b41" + ], + "text": "Seqera ", + "_key": "4b0e9968a18b3" + }, + { + "_type": "span", + "marks": [], + "text": "- the creators of ", + "_key": "4b0e9968a18b4" + }, + { + "_type": "span", + "marks": [ + "3f0fdb18390e" + ], + "text": "Nextflow", + "_key": "4b0e9968a18b5" + }, + { + "_type": "span", + "marks": [], + "text": " - returns to the old continent and will bring together leading experts, innovators, and researchers to showcase the latest breakthroughs in ", + "_key": "4b0e9968a18b6" + }, + { + "text": "bioinformatics workflow management", + "_key": "9d456da5cb10", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "text": ". Whether you are new to Nextflow or a seasoned pro, the Nextflow Summit offers something for everyone. The ", + "_key": "373cbfba2986", + "_type": "span", + "marks": [] + }, + { + "_key": "4b0e9968a18b7", + "_type": "span", + "marks": [ + "3b66656713f3" + ], + "text": "foundational training" + }, + { + "marks": [], + "text": " is perfect for newcomers, while experienced users can dive into advanced topics during the ", + "_key": "4b0e9968a18b8", + "_type": "span" + }, + { + "_key": "4b0e9968a18b9", + "_type": "span", + "marks": [ + "3a54f51827fc" + ], + "text": "nf-core hackathon" + }, + { + "text": ". The event concludes with three days of talks where attendees can learn about the latest developments from the Nextflow world.", + "_key": "4b0e9968a18b10", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_key": "35a1087e0cc40", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "8116e6ce1ea3" + }, + { + "children": [ + { + "text": "Register by October 11 for the in-person event or by October 21 for the online event — don’t miss your chance to join! ", + "_key": "de39ddb7f24d0", + "_type": "span", + "marks": [] + }, + { + "_key": "b1db89a2a9460", + "_type": "span", + "marks": [ + "5bfcc95e707c" + ], + "text": "Secure your spot now" + } + ], + "_type": "block", + "style": "blockquote", + "_key": "ac094e244fb8", + "markDefs": [ + { + "_key": "5bfcc95e707c", + "_type": "link", + "href": "https://summit.nextflow.io/2024/barcelona/?utm_campaign=Summit%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024" + } + ] + }, + { + "children": [ + { + "_key": "20a3f7967a050", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "3e5fcc90ac56", + "markDefs": [] + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://www.terrapinn.com/conference/biotechx/index.stm", + "_key": "b7d0470bb0a0" + } + ], + "children": [ + { + "marks": [ + "b7d0470bb0a0", + "strong" + ], + "text": "BiotechX Europe", + "_key": "9a11618984d80", + "_type": "span" + } + ], + "_type": "block", + "style": "h3", + "_key": "e416b76c45cc" + }, + { + "_type": "block", + "style": "normal", + "_key": "25078f3c087a", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "eb507cbfd26d0", + "_type": "span", + "marks": [] + } + ] + }, + { + "children": [ + { + "marks": [ + "strong" + ], + "text": "Location", + "_key": "e089ddce37050", + "_type": "span" + }, + { + "marks": [], + "text": ": Basel, Switzerland", + "_key": "6f54ca8682e0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "56111676db9c", + "markDefs": [] + }, + { + "_key": "97e0f9f38fc5", + "markDefs": [], + "children": [ + { + "_key": "b16b44c054520", + "_type": "span", + "marks": [ + "strong" + ], + "text": "Dates:" + }, + { + "_type": "span", + "marks": [], + "text": " October 9-10, 2024", + "_key": "a05dff9d821a" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "In-person", + "_key": "f8f8a4a1a7570", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "a5c447814d2e" + }, + { + "children": [ + { + "text": "", + "_key": "8a9758c9fea70", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "40fa33cf6eb4", + "markDefs": [] + }, + { + "children": [ + { + "_key": "2e13ccf9b0c60", + "_type": "span", + "marks": [], + "text": "If you work in pharmaceutical development and healthcare, this is the event for you to attend. As Europe’s largest conference in the industry, BiotechX Europe will welcome more than 400 speakers, 3,500 attendees, and 150 exhibitors. Aiming to foster collaboration between research and industry, the event features 16 tracks covering a wide range of topics, including " + }, + { + "_key": "9383f9cb63d8", + "_type": "span", + "marks": [ + "strong" + ], + "text": "bioinformatics, multi-omics data management, AI, and computational genomics" + }, + { + "marks": [], + "text": ".", + "_key": "88656d154026", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "d3885f9b73c9", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "a4d22394e97d0" + } + ], + "_type": "block", + "style": "normal", + "_key": "b0c44d155f57", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "c31bb3fd6482", + "markDefs": [ + { + "href": "https://seqera.io/seqera-at-biotechx-eu-2024/?utm_campaign=Summit%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "5bddbaea2256", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "5bddbaea2256" + ], + "text": "Seqera will be at the event", + "_key": "fa7beca9b4170" + }, + { + "_key": "fa7beca9b4171", + "_type": "span", + "marks": [], + "text": " for two full days of networking and discussion with the life sciences community from around the world. We'll also deliver a talk as part of the bioinformatics track, so be sure to stop by. Can’t make it? No worries–we'll send you the recording afterward." + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "d20d7581ffe7", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "fb0c1c420266" + } + ] + }, + { + "_type": "block", + "style": "blockquote", + "_key": "c34556d8796c", + "markDefs": [ + { + "href": "https://seqera.io/seqera-at-biotechx-eu-2024/?utm_campaign=BiotechX%20Europe%20October%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "abc21a447095", + "_type": "link" + } + ], + "children": [ + { + "text": "Send me the recording!", + "_key": "75425c19bb920", + "_type": "span", + "marks": [ + "abc21a447095" + ] + } + ] + }, + { + "style": "normal", + "_key": "9b2c991e6bc8", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "ccffff4a3a840", + "_type": "span" + } + ], + "_type": "block" + }, + { + "style": "h2", + "_key": "ad59bb4d3c92", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Top bioinformatics events in North America", + "_key": "dee5fb2705be0" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "54f53931838a", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "b2f8fdbbacd3", + "markDefs": [] + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://www.ashg.org/meetings/2024meeting/", + "_key": "1de576a95b41" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "1de576a95b41", + "strong" + ], + "text": "American Society of Human Genetics (ASHG) 2024 Annual Meeting", + "_key": "f121c91a01960" + } + ], + "_type": "block", + "style": "h3", + "_key": "b82701a347b9" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "270a0b0c93520" + } + ], + "_type": "block", + "style": "normal", + "_key": "9791344b1601", + "markDefs": [] + }, + { + "children": [ + { + "text": "Location", + "_key": "5f83a2c703a00", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "34f611874891", + "_type": "span", + "marks": [], + "text": ": Denver, CO" + } + ], + "_type": "block", + "style": "normal", + "_key": "6cafccd72b27", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Dates:", + "_key": "200e5ac4ecbf0" + }, + { + "_key": "1c818d12c54a", + "_type": "span", + "marks": [], + "text": " November 5-9, 2024" + } + ], + "_type": "block", + "style": "normal", + "_key": "6dd5d782c459" + }, + { + "children": [ + { + "text": "In-person", + "_key": "48854f5dda260", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "f2486648036e", + "markDefs": [] + }, + { + "style": "normal", + "_key": "736a1293e25e", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "2d6ee098f3950" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "ASHG 2024 will welcome more than 8,000 scientists from around the world for five days of talks, exhibits, and networking events focused on ", + "_key": "bbcbfde31c4b0" + }, + { + "text": "genetics and genomics science", + "_key": "cacd04d5d23c", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "712fc74e16b6", + "_type": "span", + "marks": [], + "text": ". The conference will feature many sessions and workshops dedicated to bioinformatics, big data analysis, and computational biology, making it one of the most anticipated events this year for bioinformaticians and computational biologists." + } + ], + "_type": "block", + "style": "normal", + "_key": "886e182edc4d" + }, + { + "style": "normal", + "_key": "18ad39abbcf6", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "0414fa03d6dd", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "e5ca7f24e4c2", + "markDefs": [], + "children": [ + { + "text": "Seqera will exhibit at the event and lead an industry session on November 6 at 12:00 pm. More information will be available soon.", + "_key": "3e19b04852a00", + "_type": "span", + "marks": [] + } + ] + }, + { + "style": "normal", + "_key": "ef22f2d30f12", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "f2faa13a66e9" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "c8edc7ead73f0", + "_type": "span", + "marks": [], + "text": "If you'd like to join ASHG, make sure to register by October 1 – time is running out!" + } + ], + "_type": "block", + "style": "blockquote", + "_key": "a75336029a69" + }, + { + "style": "normal", + "_key": "406e5aa50374", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "97a5825ce0bd0" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "h3", + "_key": "c1609723d99c", + "markDefs": [ + { + "_type": "link", + "href": "https://sc24.supercomputing.org/", + "_key": "aad3372eb124" + } + ], + "children": [ + { + "_key": "7f49ea4d4a120", + "_type": "span", + "marks": [ + "aad3372eb124", + "strong" + ], + "text": "Supercomputing Conference (SC) 2024" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "4aa3272646850" + } + ], + "_type": "block", + "style": "normal", + "_key": "f80b88664b26" + }, + { + "children": [ + { + "_key": "2aa2e076c3e00", + "_type": "span", + "marks": [ + "strong" + ], + "text": "Location" + }, + { + "marks": [], + "text": ": Atlanta, GA", + "_key": "26f9d1914085", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "8856171a422b", + "markDefs": [] + }, + { + "_key": "8e494c697046", + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Dates", + "_key": "430211bfc8fd0", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": ": November 17-22, 2024", + "_key": "4b57e300353f" + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "In-person", + "_key": "d02b7076e2730" + } + ], + "_type": "block", + "style": "normal", + "_key": "d6687f77180c", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "0784e7e7b650", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "1786737407f60", + "_type": "span", + "marks": [] + } + ] + }, + { + "_key": "b05914a7f5f4", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "SC 2024 is an essential event for professionals and students in the high-performance computing (HPC) community. It is heavily oriented towards bioinformaticians involved in the computational aspects of bioinformatics and will tackle topics including ", + "_key": "a6f68a9bee6f0" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "AI, machine learning, and cloud computing", + "_key": "4c799f8dcbf5" + }, + { + "text": ". The six-day event will also allow attendees to attend tutorials and workshops, giving them the chance to learn from leading experts in the most popular areas of HPC.", + "_key": "07dfe2450254", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_key": "0972cb55b5a20", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "709058259fc0", + "markDefs": [] + }, + { + "_key": "613b9498e172", + "markDefs": [], + "children": [ + { + "text": "Top bioinformatics event in Asia-Pacific", + "_key": "c59da1af8df20", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "d645227f6012", + "markDefs": [ + { + "_key": "48fc94ab104d", + "_type": "link", + "href": "https://www.abacbs.org/conference2024/home" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "48fc94ab104d", + "strong" + ], + "text": "Australian Bioinformatics and Computational Biology Society (ABACBS)", + "_key": "cdbc4313d6e40" + } + ], + "_type": "block", + "style": "h3" + }, + { + "_type": "block", + "style": "normal", + "_key": "b7bf73f58eec", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "08ac1af2bef90" + } + ] + }, + { + "_key": "8c082012ba0e", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Location", + "_key": "74cbc34e6fc00" + }, + { + "_type": "span", + "marks": [], + "text": ": Sydney, Australia", + "_key": "6c5ae628c782" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "text": "Dates", + "_key": "3ad60b7ef0f70", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "591dd93801c2", + "_type": "span", + "marks": [], + "text": ": November 4-6, 2024" + } + ], + "_type": "block", + "style": "normal", + "_key": "aa5f5d785b87" + }, + { + "style": "normal", + "_key": "51210721b576", + "markDefs": [], + "children": [ + { + "text": "In-person", + "_key": "70aeae7c0a1c0", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "084892f223c3", + "markDefs": [], + "children": [ + { + "_key": "dc51128dbf810", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "9d1c6ab024b3", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Back for its 9th edition, the Australian Bioinformatics and Computational Biology Society conference (ABACBS) is an exciting event for bioinformatics professionals and students in APAC, serving as the central hub for bioinformatics and computational biology in the region. In addition to highlighting international developments in the field, the conference focuses on regional bioinformatics innovations across central themes such as ", + "_key": "0b7cb42aa9110" + }, + { + "_key": "0290cc74565c", + "_type": "span", + "marks": [ + "strong" + ], + "text": "AI, statistical bioinformatics, genomics, proteomics, and single-cell and spatial technologies" + }, + { + "marks": [], + "text": ".", + "_key": "aca568628005", + "_type": "span" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "99b177a43c3c", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "733d749612cf0", + "_type": "span" + } + ] + }, + { + "_key": "00d0e23d734f", + "markDefs": [ + { + "_key": "dae3da921ce9", + "_type": "link", + "href": "https://www.combine.org.au/symp/" + } + ], + "children": [ + { + "text": "If you’re a student in the field, you should consider attending the event, which will be held in conjunction with the ", + "_key": "9513b42b74790", + "_type": "span", + "marks": [] + }, + { + "_key": "9513b42b74791", + "_type": "span", + "marks": [ + "dae3da921ce9" + ], + "text": "COMBINE" + }, + { + "_type": "span", + "marks": [], + "text": " student symposium.", + "_key": "9513b42b74792" + } + ], + "_type": "block", + "style": "blockquote" + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "fbe1769338e30", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "d7f7b49e0840" + }, + { + "_type": "block", + "style": "h2", + "_key": "4560ed2f3628", + "markDefs": [], + "children": [ + { + "_key": "138d633d7ef70", + "_type": "span", + "marks": [], + "text": "Upcoming bioinformatics events in 2025" + } + ] + }, + { + "_key": "f33391676310", + "markDefs": [], + "children": [ + { + "_key": "bb6eb50c89060", + "_type": "span", + "marks": [], + "text": "For those of you already planning for next year's conference season, we’ve highlighted events that are already confirmed for 2025. While their programs are yet to be released, you can count on these events taking place." + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "text": "", + "_key": "cbc69b8eee9f", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "8575b2b8ad7c", + "markDefs": [] + }, + { + "style": "h3", + "_key": "9527133dd59c", + "markDefs": [ + { + "href": "https://summit.nextflow.io/preregister-2025/", + "_key": "68b913f586a0", + "_type": "link" + } + ], + "children": [ + { + "_key": "facd674d64600", + "_type": "span", + "marks": [ + "68b913f586a0", + "strong" + ], + "text": "Nextflow Summit 2025" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "f7a2358dce0a0" + } + ], + "_type": "block", + "style": "normal", + "_key": "1755b2902f51", + "markDefs": [] + }, + { + "style": "normal", + "_key": "efae1385cf87", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Location", + "_key": "f026d6e6cc2d0" + }, + { + "marks": [], + "text": ": Boston & Barcelona", + "_key": "35978ba96f8b", + "_type": "span" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "a08e77d4c18c", + "markDefs": [], + "children": [ + { + "_key": "f09ea28e89f20", + "_type": "span", + "marks": [ + "strong" + ], + "text": "Dates:" + }, + { + "marks": [], + "text": " May 13-16, 2025, Boston | Fall 2025, Barcelona", + "_key": "16d8512f2f29", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "In-person | Online", + "_key": "234a304394e60" + } + ], + "_type": "block", + "style": "normal", + "_key": "a62cf4f06de9" + }, + { + "_type": "block", + "style": "normal", + "_key": "38bdbbe0ea31", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "e2bba980e95a0" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "If you missed earlier editions of the Boston and Barcelona Nextflow Summits, this is your chance to take part. The Nextflow Summit will be back in Boston during the Spring of 2025 and to Barcelona in the Fall. While the full agenda is yet to be released, you can already pre-register to be the first to know when tickets go on sale.", + "_key": "949ff83df9750" + } + ], + "_type": "block", + "style": "normal", + "_key": "3b9cca4be7c4" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "808678869018", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "174f3cf7650b" + }, + { + "style": "blockquote", + "_key": "ee129ec0b1f1", + "markDefs": [ + { + "_type": "link", + "href": "https://summit.nextflow.io/preregister-2025/?utm_campaign=Summit%202024&utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "5b563ada5eeb" + } + ], + "children": [ + { + "marks": [ + "5b563ada5eeb" + ], + "text": "Pre-register", + "_key": "00e54ab70ebd0", + "_type": "span" + } + ], + "_type": "block" + }, + { + "style": "h3", + "_key": "d2097b2976a1", + "markDefs": [ + { + "href": "https://festivalofgenomics.com/london/en/page/home", + "_key": "ae10362c6b73", + "_type": "link" + } + ], + "children": [ + { + "_key": "56dc071f15870", + "_type": "span", + "marks": [], + "text": "\n" + }, + { + "_type": "span", + "marks": [ + "ae10362c6b73", + "strong" + ], + "text": "The Festival of Genomics & Biodata", + "_key": "5ac46e126d80" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "857eb01550dd", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "51ff3f3d71470" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "3c23340b964e0", + "_type": "span", + "marks": [ + "strong" + ], + "text": "Location" + }, + { + "marks": [], + "text": ": London, UK", + "_key": "dc560c599223", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "e7da00284d71" + }, + { + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Dates", + "_key": "679432d9beff0", + "_type": "span" + }, + { + "text": ": January 29-30, 2025", + "_key": "dd4fd5fccef2", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "5554a518940e" + }, + { + "style": "normal", + "_key": "b00fc59f223a", + "markDefs": [], + "children": [ + { + "_key": "706f90cb61650", + "_type": "span", + "marks": [], + "text": "In-person" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "e6d4529eb25f0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "82b182d551db" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Established as the UK’s largest annual life sciences event, the Festival of Genomics & Biodata is particularly relevant for ", + "_key": "4e96f23e3acc0" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "bioinformaticians in the genomics community", + "_key": "ba05da7be364" + }, + { + "_key": "72271b05856e", + "_type": "span", + "marks": [], + "text": ". The 2025 edition is expected to gather more than 7000 attendees and 300 speakers. The full agenda will be released on October 15, 2024, but you can already express interest in registering to be the first to know when tickets go on sale!" + } + ], + "_type": "block", + "style": "normal", + "_key": "5e3ffe169818" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "c4dd4d1cba7f" + } + ], + "_type": "block", + "style": "normal", + "_key": "9e05733ec461", + "markDefs": [] + }, + { + "style": "blockquote", + "_key": "64a0959f1585", + "markDefs": [ + { + "href": "https://seqera.io/events/?utm_source=seqera&utm_medium=blog&utm_content=top_events_fall_2024", + "_key": "511e6294d45b", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Seqera will be attending the Festival for the third year in a row! We’ll share more information about our participation soon–stay tuned! To make sure you don’t miss out on any announcements, follow us on social media or check out our ", + "_key": "a552c6b5f6c50" + }, + { + "_type": "span", + "marks": [ + "511e6294d45b" + ], + "text": "events page", + "_key": "a552c6b5f6c51" + }, + { + "text": ".", + "_key": "a552c6b5f6c52", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "938fc2309ea1", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "da2fe42732c0" + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "3af3eac54fd9", + "strong" + ], + "text": "Bio-IT World Conference & Expo", + "_key": "f95dcb9530220" + } + ], + "_type": "block", + "style": "h3", + "_key": "e878b99f1723", + "markDefs": [ + { + "_type": "link", + "href": "https://www.bio-itworldexpo.com/", + "_key": "3af3eac54fd9" + } + ] + }, + { + "_key": "54ddb92cf29e", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "465133b3815c0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "54792387f148", + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Location", + "_key": "f593685fd4f20", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": ": Boston, MA", + "_key": "1b69f8ea855f" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Dates", + "_key": "e6746c9c63d90" + }, + { + "_key": "369508d1c229", + "_type": "span", + "marks": [], + "text": ": April 2-4, 2025" + } + ], + "_type": "block", + "style": "normal", + "_key": "9e746b424dfe" + }, + { + "markDefs": [], + "children": [ + { + "text": "In-person | Online", + "_key": "d3d807db73a90", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "10af5b1aaee8" + }, + { + "style": "normal", + "_key": "d17fe67cf1d4", + "markDefs": [], + "children": [ + { + "_key": "69afc5921d850", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "d98d6ce09255", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The Annual Conference and Expo focuses on the intersection of ", + "_key": "c570a28e8bac0" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "life sciences, data sciences, and technology", + "_key": "ec0b70541836" + }, + { + "_key": "a28d11d5fc55", + "_type": "span", + "marks": [], + "text": " and is particularly suited to bioinformaticians and computational biologists with a strong interest in data and technology. The event includes plenary keynotes, over 200 educational and technical presentations across 11 tracks, interactive discussions, and exhibits on the latest technologies in the life sciences. Those of you who can’t attend in person can follow a live virtual stream. Registrations are already open, and you can benefit from a discounted rate until November 15, 2024!" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "6f9ff5363f1a", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "7b614ade8e38", + "_type": "span" + } + ] + }, + { + "_key": "b061a380bfe9", + "markDefs": [], + "children": [ + { + "text": "Seqera will be a Platinum sponsor of Bio-IT World. Visit our booth on the tradeshow floor and listen to our presentation on the Cloud Computing track on Thursday, April 3. More information will be available earlier in the year.", + "_key": "46425ee3e2460", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "blockquote" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "4ec1d8fa8969", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "94fff51924ce" + }, + { + "_key": "9b2834fdcebf", + "markDefs": [], + "children": [ + { + "_key": "b4d6c1726cd40", + "_type": "span", + "marks": [], + "text": "Why these events matter: learn, innovate, connect" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "32dd265b0c7b", + "markDefs": [], + "children": [ + { + "text": "The events we’ve highlighted are all well-established and represent a unique opportunity to keep up with the latest research, build strong industry connections, and learn new skills. Throughout the wide range of topics and specialties covered, data scientists and bioinformaticians can keep up with how the field is advancing, both at the regional and international levels.", + "_key": "3a6692daa6b20", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "91d71b7afa4b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The hands-on workshops and tutorials will also help develop practical skills that you can apply to your research or work.", + "_key": "02f1d633eef70" + } + ] + }, + { + "children": [ + { + "text": "Whether you’re just starting or a seasoned expert, these events represent an excellent opportunity for professional growth and to remain at the forefront of bioinformatics.", + "_key": "5e231565ed770", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "d814e73f13d4", + "markDefs": [] + }, + { + "style": "normal", + "_key": "c77e333c8e1d", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "a388e852748e", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_type": "image", + "_key": "737760a444b6", + "asset": { + "_ref": "image-54912048f85a1aa655553391b6d0e62fa57e82de-1200x628-png", + "_type": "reference" + } + }, + { + "style": "normal", + "_key": "e4638b15e2df", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "\n", + "_key": "1ced84af1e7a0", + "_type": "span" + } + ], + "_type": "block" + } + ], + "_updatedAt": "2024-09-24T09:27:24Z" + }, + { + "title": "MultiQC: Grouped samples and custom scripts", + "_updatedAt": "2024-10-16T13:31:10Z", + "_createdAt": "2024-05-23T06:21:37Z", + "publishedAt": "2024-10-16T06:00:00.000Z", + "body": [ + { + "_key": "825c0af35887", + "markDefs": [ + { + "_type": "link", + "href": "https://summit.nextflow.io/2024/barcelona/agenda/10-31--multiqc-new-features-and-flexible/", + "_key": "85a288678e95" + } + ], + "children": [ + { + "_key": "f07cbc53bd370", + "_type": "span", + "marks": [], + "text": "It’s been an exciting year for the MultiQC team at Seqera, with developments aimed at modernizing the codebase and expanding functionality. In this blog post we’ll recap the big features, such as long-awaited " + }, + { + "text": "Sample Grouping", + "_key": "f07cbc53bd371", + "_type": "span", + "marks": [ + "em" + ] + }, + { + "marks": [], + "text": " to simplify report tables, as well as the ability to use MultiQC as a Python library, enabling custom scripts and dynamic report generation. And there’s even more to come – stay tuned for the upcoming ", + "_key": "f07cbc53bd372", + "_type": "span" + }, + { + "text": "MultiQC talk", + "_key": "f07cbc53bd373", + "_type": "span", + "marks": [ + "85a288678e95" + ] + }, + { + "_type": "span", + "marks": [], + "text": " at the Nextflow Summit in Barcelona, excitement guaranteed!", + "_key": "f07cbc53bd374" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_key": "05dc46b2aa760", + "_type": "span", + "marks": [], + "text": "Sample grouping 🫂" + } + ], + "_type": "block", + "style": "h2", + "_key": "a2253fab8405" + }, + { + "_type": "block", + "style": "normal", + "_key": "e597c2f9c4b4", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Many of you who are used to reading MultiQC reports will be familiar with seeing ", + "_key": "8808adba7b870" + }, + { + "marks": [ + "em" + ], + "text": "General Statistics", + "_key": "8808adba7b871", + "_type": "span" + }, + { + "text": " tables that have “gaps” in rows like this:", + "_key": "8808adba7b872", + "_type": "span", + "marks": [] + } + ] + }, + { + "asset": { + "_ref": "image-0bc6a5e44bd0449bf63fa8a3fe9380e10fcaed01-3482x2064-png", + "_type": "reference" + }, + "_type": "image", + "_key": "f111f647ef2a" + }, + { + "markDefs": [], + "children": [ + { + "text": "This happens because MultiQC finds sample names from input data filenames. In the case of FastQC, paired-end sequencing data will have two FASTQ files and generate two separate FastQC reports. This means each sample name has a ", + "_key": "fe85e8a0b3450", + "_type": "span", + "marks": [] + }, + { + "text": "_R1", + "_key": "5c683f75103f", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "text": " or ", + "_key": "387b4dbd0c21", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "code" + ], + "text": "_R2", + "_key": "be0566f07b2f", + "_type": "span" + }, + { + "text": " suffix and cannot be merged with outputs from downstream analysis, where these are collapsed into a single sample identifier. Until now, the best advice we’ve been able to give is to either throw half of the data away or put up with the ugly tables - neither are good options!", + "_key": "dc7b7eb36c44", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "0acde0d59fb6" + }, + { + "_key": "a5338993f69c", + "markDefs": [ + { + "_key": "09acce0b2b3f", + "_type": "link", + "href": "https://github.com/MultiQC/MultiQC/issues/542" + } + ], + "children": [ + { + "marks": [], + "text": "One of the oldest open issues in the MultiQC repo (", + "_key": "8eef787f80ea0", + "_type": "span" + }, + { + "_key": "8eef787f80ea1", + "_type": "span", + "marks": [ + "09acce0b2b3f" + ], + "text": "#542" + }, + { + "_key": "8eef787f80ea2", + "_type": "span", + "marks": [], + "text": ", from 2017) is about introducing a new technique to group samples. Phil started a branch to work on the problem but hit a wall, leaving the comment " + }, + { + "text": "“This got really complicated. Need to think about how to improve it.”", + "_key": "8eef787f80ea3", + "_type": "span", + "marks": [ + "em" + ] + }, + { + "text": " There it sat, racking up occasional comments and requests for updates.", + "_key": "8eef787f80ea4", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_key": "cd6951d8332a0", + "_type": "span", + "marks": [], + "text": "Finally in MultiQC v1.25, seven years after this issue was created, we’re delighted to introduce – " + }, + { + "text": "Sample grouping", + "_key": "cd6951d8332a1", + "_type": "span", + "marks": [ + "em" + ] + }, + { + "_type": "span", + "marks": [], + "text": ":", + "_key": "cd6951d8332a2" + } + ], + "_type": "block", + "style": "normal", + "_key": "3423abf00059" + }, + { + "_key": "48911ef9fb45", + "asset": { + "_type": "reference", + "_ref": "image-c12d430eb8dc05b871a48add5e8f8e22c2ff6028-1640x720-gif" + }, + "_type": "image" + }, + { + "style": "normal", + "_key": "5f7071610ad9", + "markDefs": [ + { + "href": "https://docs.seqera.io/multiqc/reports/customisation#sample-grouping", + "_key": "48b4eec2b409", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "This new ", + "_key": "f9dd723187bd0" + }, + { + "_key": "988338ff4a02", + "_type": "span", + "marks": [ + "code" + ], + "text": "table_sample_merge" + }, + { + "_key": "d921626516f4", + "_type": "span", + "marks": [], + "text": " config option allows you to specify sample name suffixes to group into a single row (see " + }, + { + "marks": [ + "48b4eec2b409" + ], + "text": "docs", + "_key": "f9dd723187bd1", + "_type": "span" + }, + { + "_key": "f9dd723187bd2", + "_type": "span", + "marks": [], + "text": "). When set, MultiQC will group samples in supported modules under a common prefix. Any component sample statistics can be shown by toggling the caret in the row header, with summary statistics on the main row. This allows a compressed yet accurate overview of all samples, whilst still allowing readers of the report to dig in and see the underlying data for each input sample." + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "8a074f0efc68", + "markDefs": [], + "children": [ + { + "_key": "edc4f643059d0", + "_type": "span", + "marks": [], + "text": "For now, the new config option is opt-in, but we hope to soon set some common suffixes such as " + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "_R1", + "_key": "c67dcad3c755" + }, + { + "_key": "18b26913d3a7", + "_type": "span", + "marks": [], + "text": " and " + }, + { + "_key": "7a569be866d9", + "_type": "span", + "marks": [ + "code" + ], + "text": "_R2" + }, + { + "_key": "4811b985f605", + "_type": "span", + "marks": [], + "text": " as defaults for all users. Some modules have the concept of sub-samples within parsed data (e.g., flow cells → lanes) and use sample grouping without needing additional configuration. The sample grouping implementation is entirely bespoke to each MultiQC module: each column needs consideration as to whether it should be averaged, summed, or something else. We’ve added support to key modules such as FastQC, Cutadapt, and BCLConvert, and plan to add support to more modules over time." + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "MultiQC as a library 📜", + "_key": "e2bf849132580" + } + ], + "_type": "block", + "style": "h2", + "_key": "32587d324a17" + }, + { + "markDefs": [], + "children": [ + { + "_key": "91f6511e64730", + "_type": "span", + "marks": [], + "text": "Version 1.22 brought some major behind-the-scenes refactoring to MultiQC. These changes enable MultiQC to be used as a library within scripts. It adds another way to customize report content beyond “Custom Content” and MultiQC Plugins, as you can now dynamically inject data, filter, and customize report content within a script. Ideal for use within analysis pipelines!" + } + ], + "_type": "block", + "style": "normal", + "_key": "0f9ae21494d4" + }, + { + "_type": "block", + "style": "normal", + "_key": "6049bf0f935f", + "markDefs": [ + { + "_key": "f089f7503fa5", + "_type": "link", + "href": "https://github.com/OpenGene/fastp" + } + ], + "children": [ + { + "marks": [], + "text": "Let's look at a very basic example to give a feel for how this could be used. Here, we have a Python script that imports MultiQC, parses report data from ", + "_key": "3bb10cbfcadb0", + "_type": "span" + }, + { + "_key": "3bb10cbfcadb1", + "_type": "span", + "marks": [ + "f089f7503fa5" + ], + "text": "fastp" + }, + { + "_type": "span", + "marks": [], + "text": ", adds a custom report section and table, and then generates a report.", + "_key": "3bb10cbfcadb2" + } + ] + }, + { + "code": "import multiqc\nfrom multiqc.plots import table\n\n# Parse logs from fastp\nmultiqc.parse_logs('./data/fastp')\n\n# Add a custom table\nmodule = multiqc.BaseMultiqcModule()\nmodule.add_section(\n plot=table.plot(\n data={\n \"sample 1\": {\"aligned\": 23542, \"not_aligned\": 343},\n \"sample 2\": {\"aligned\": 1275, \"not_aligned\": 7328},\n },\n pconfig={\n \"id\": \"my_metrics_table\",\n \"title\": \"My metrics\"\n }\n )\n)\nmultiqc.report.modules.append(module)\n\n# Generate the report\nmultiqc.write_report()", + "_type": "code", + "language": "python", + "_key": "2fc6817b3c84" + }, + { + "markDefs": [ + { + "_key": "07b92ff1c1c2", + "_type": "link", + "href": "https://docs.seqera.io/multiqc/development/plugins" + } + ], + "children": [ + { + "text": "Scripts like this can be written to do any number of things. We hope it removes the need to run MultiQC multiple times to report on secondary statistics. It can also enable customization of things like table columns, custom data injection, and most other things you can think of! Best of all, unlike ", + "_key": "90677f2e0a320", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "07b92ff1c1c2" + ], + "text": "MultiQC plugins", + "_key": "90677f2e0a321", + "_type": "span" + }, + { + "text": ", no special installation is needed. This will be hugely powerful for custom analysis and reporting. It also means that MultiQC becomes a first-class citizen for explorative analysis within notebooks and analysis apps.", + "_key": "90677f2e0a322", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "0d4aca5310f8" + }, + { + "_key": "4bf457962649", + "markDefs": [ + { + "_type": "link", + "href": "https://multiqc.info/docs/usage/interactive/", + "_key": "8135552fbdf7" + }, + { + "href": "https://community.seqera.io/multiqc", + "_key": "ec397c4c15d7", + "_type": "link" + } + ], + "children": [ + { + "marks": [], + "text": "See the new ", + "_key": "98837a87371a0", + "_type": "span" + }, + { + "text": "Using MultiQC in interactive environments", + "_key": "98837a87371a1", + "_type": "span", + "marks": [ + "8135552fbdf7" + ] + }, + { + "_type": "span", + "marks": [], + "text": " page to learn more about MultiQC Python functions. ", + "_key": "98837a87371a2" + }, + { + "text": "Let us know", + "_key": "98837a87371a3", + "_type": "span", + "marks": [ + "ec397c4c15d7" + ] + }, + { + "marks": [], + "text": " how you get on with this functionality - we’d love to see what you build!", + "_key": "98837a87371a4", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Major performance improvements 🚅", + "_key": "343f5cc92fe80" + } + ], + "_type": "block", + "style": "h2", + "_key": "31398f87eeaf" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/rhpvorderman", + "_key": "fd09f4c005cf" + } + ], + "children": [ + { + "text": "In MultiQC v1.22 we’ve had a number of high-impact pull requests from ", + "_key": "17f6d6a138610", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "fd09f4c005cf" + ], + "text": "@rhpvorderman", + "_key": "17f6d6a138611", + "_type": "span" + }, + { + "marks": [], + "text": ". He did a deep-dive on the compression that MultiQC uses for embedding data within the HTML reports, switching the old ", + "_key": "17f6d6a138612", + "_type": "span" + }, + { + "_key": "19801d576faf", + "_type": "span", + "marks": [ + "code" + ], + "text": "lzstring" + }, + { + "text": " compression for a more up-to-date ", + "_key": "45969e812380", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "code" + ], + "text": "gzip", + "_key": "c5f9f1fce5cc", + "_type": "span" + }, + { + "text": " implementation, which made writing reports ", + "_key": "fa6d2402801f", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "4x times faster", + "_key": "17f6d6a138613" + }, + { + "marks": [], + "text": ".", + "_key": "17f6d6a138614", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "5817aa826cf0" + }, + { + "children": [ + { + "marks": [], + "text": "He also significantly optimized the file search, making it ", + "_key": "d3ed30f558930", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "54% faster", + "_key": "d3ed30f558931" + }, + { + "text": " on our benchmarks, and key modules. For example, ", + "_key": "d3ed30f558932", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "FastQC got 6x faster and uses 10x less memory", + "_key": "d3ed30f558933" + }, + { + "_type": "span", + "marks": [], + "text": ".", + "_key": "d3ed30f558934" + } + ], + "_type": "block", + "style": "normal", + "_key": "e355a0723a10", + "markDefs": [] + }, + { + "_type": "block", + "style": "blockquote", + "_key": "688e13eb74b5", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Taken together, comparing a typical v1.22 run against v1.21 shows that MultiQC is ", + "_key": "ca2a7d2cc9ea0" + }, + { + "text": "53% faster", + "_key": "ca2a7d2cc9ea1", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "ca2a7d2cc9ea2", + "_type": "span", + "marks": [], + "text": " and has a " + }, + { + "marks": [ + "strong" + ], + "text": "6x smaller peak-memory footprint", + "_key": "ca2a7d2cc9ea3", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": ". It’s well worth updating!", + "_key": "ca2a7d2cc9ea4" + } + ] + }, + { + "_key": "27a4647d4af0", + "markDefs": [ + { + "_type": "link", + "href": "https://www.science.org/doi/full/10.1126/sciadv.aba1190", + "_key": "98689cf6da09" + } + ], + "children": [ + { + "marks": [], + "text": "To get these numbers for real-world scenarios, we tested some huge input datasets (many thanks to Felix Krueger for helping with these). For example, from ", + "_key": "bd242e39c19c0", + "_type": "span" + }, + { + "text": "Xing et. al. 2020", + "_key": "bd242e39c19c1", + "_type": "span", + "marks": [ + "98689cf6da09" + ] + }, + { + "marks": [], + "text": ":", + "_key": "bd242e39c19c2", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "187c0ee80e97", + "asset": { + "_ref": "image-d8472852383a8de068c60e4b67cccb9401fda6e8-2202x1206-svg", + "_type": "reference" + }, + "_type": "image" + }, + { + "children": [ + { + "_key": "26881f3f445b0", + "_type": "span", + "marks": [], + "text": "These three runs were run with identical inputs and generated essentially identical reports." + } + ], + "_type": "block", + "style": "normal", + "_key": "4c2e62b1a6ce", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "c5778edba7c9", + "markDefs": [], + "children": [ + { + "_key": "29c455e60d230", + "_type": "span", + "marks": [], + "text": "These improvements will be especially noticeable with large runs. Improvements are also especially significant in certain MultiQC modules, including FastQC (10x less peak memory), Mosdepth, and Kraken (~20x improvement in memory and CPU in MultiQC v1.24, larger improvements with more samples)." + } + ] + }, + { + "markDefs": [], + "children": [ + { + "text": "We hope that this makes MultiQC more usable at scale and makes your analysis pipelines run a little smoother!", + "_key": "0299272ee77e0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "16f4ee957baa" + }, + { + "_key": "03fd047a042d", + "markDefs": [], + "children": [ + { + "_key": "fe15ec5a699d0", + "_type": "span", + "marks": [], + "text": "Unit tests 🧪" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_type": "block", + "style": "normal", + "_key": "9a1dcc7171ea", + "markDefs": [], + "children": [ + { + "_key": "8dac686af5890", + "_type": "span", + "marks": [], + "text": "Until now, MultiQC only had rudimentary end-to-end testing - each continuous integration test simply runs MultiQC on a range of test data and checks that it doesn’t crash (there are a few more bells and whistles, but that’s the essence of it). These CI tests have worked remarkably well, considering. However - they do not catch unintentional changes to data outputs and are limited in their scope." + } + ] + }, + { + "markDefs": [ + { + "_key": "8c10e89e187d", + "_type": "link", + "href": "https://docs.pytest.org/" + }, + { + "_type": "link", + "href": "https://docs.seqera.io/multiqc/development/modules#tests", + "_key": "49e1086b5601" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Version 1.23 of MultiQC introduced unit tests. These small, isolated tests are a cornerstone of modern software development. A suite of ", + "_key": "69fec00fe0b70" + }, + { + "marks": [ + "8c10e89e187d" + ], + "text": "pytest", + "_key": "69fec00fe0b71", + "_type": "span" + }, + { + "_key": "69fec00fe0b72", + "_type": "span", + "marks": [], + "text": " tests now cover most of the core library code. Pytest is also used to “just run” modules as before (with 90% code coverage!), but going forward we will require module authors to include a tests directory with custom detailed unit tests. See " + }, + { + "_key": "69fec00fe0b73", + "_type": "span", + "marks": [ + "49e1086b5601" + ], + "text": "Tests" + }, + { + "_type": "span", + "marks": [], + "text": " for more information.", + "_key": "69fec00fe0b74" + } + ], + "_type": "block", + "style": "normal", + "_key": "ea3631bb639a" + }, + { + "markDefs": [], + "children": [ + { + "text": "It’s a lot of work to add useful test coverage to such a large codebase, and anyone familiar with the topic will know that it’s a job that’s never done. However, now that we have a framework and pattern in place we’re hopeful that test coverage will steadily increase and code quality with it.", + "_key": "154f52b5a7560", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "e632a2c061ef" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Refactoring and static typing 📐", + "_key": "751fd31f3f260" + } + ], + "_type": "block", + "style": "h2", + "_key": "62766583b084", + "markDefs": [] + }, + { + "style": "normal", + "_key": "a6ef8f12773a", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "MultiQC v1.22 refactoring brings with it the first wave of Pydantic models in the back end. This unlocks run-time validation of plot config attributes - we found and fixed a lot of bugs with this already! The code looks very similar, but the Pydantic models use classes that allow most code IDEs to highlight errors as you write. Validation at run time also means that you catch typos right away, instead of wondering why your configuration is not being applied.", + "_key": "ddace25f43050" + } + ], + "_type": "block" + }, + { + "_key": "26d505ad4426", + "asset": { + "_ref": "image-fd6b80f3dcb5eaf7c243180d8f926e59b013795c-1175x545-svg", + "_type": "reference" + }, + "_type": "image" + }, + { + "_key": "135a4646454f", + "asset": { + "_ref": "image-2cec52b9fc023cf0214e91ae653af17ab68d8631-2237x634-png", + "_type": "reference" + }, + "_type": "image" + }, + { + "children": [ + { + "_key": "aaf630994d490", + "_type": "span", + "marks": [], + "text": "Along similar lines, the core MultiQC library and test suite has had type annotations added throughout, complete with CI testing using " + }, + { + "marks": [ + "de93e9dea13b" + ], + "text": "mypy", + "_key": "aaf630994d491", + "_type": "span" + }, + { + "text": ". We will progressively add typing to all MultiQC modules over time. Typing also helps the MultiQC developer experience, with rich IDE integrations and earlier bug-catching.", + "_key": "aaf630994d492", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "91e612186bc9", + "markDefs": [ + { + "href": "https://mypy-lang.org/", + "_key": "de93e9dea13b", + "_type": "link" + } + ] + }, + { + "_type": "block", + "style": "h2", + "_key": "5369acf21e63", + "markDefs": [], + "children": [ + { + "_key": "5e015b0762a50", + "_type": "span", + "marks": [], + "text": "HighCharts removed 🗑" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "In v1.20 we added support for using Plotly instead of HighCharts for graphs in MultiQC reports. We left the HighCharts code in place whilst we transitioned to the new library, in case people hit any major issues with Plotly. As of v1.22 the HighCharts support (via ", + "_key": "20b3965936730" + }, + { + "marks": [ + "code" + ], + "text": "--template highcharts", + "_key": "ffa5eceee420", + "_type": "span" + }, + { + "text": ") has been removed completely. See the ", + "_key": "5c85500d9de2", + "_type": "span", + "marks": [] + }, + { + "_key": "20b3965936731", + "_type": "span", + "marks": [ + "85116740be7f" + ], + "text": "MultiQC: A fresh coat of paint" + }, + { + "marks": [], + "text": " blog to find out more about this topic.", + "_key": "20b3965936732", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "a8c352ae0daf", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/blog/multiqc-plotly/", + "_key": "85116740be7f" + } + ] + }, + { + "children": [ + { + "text": "Moving to seqera.io", + "_key": "330cd974225b0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2", + "_key": "f7684112839e", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "3fb7ceae5313", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Since MultiQC joined the Seqera family in 2022, we’ve been steadily improving integration with other Seqera tools and websites. Last year, we launched the Seqera Community Forum with a dedicated MultiQC section, which has been a valuable resource for users. Recently, we’ve continued this effort by moving all MultiQC documentation to Seqera.io, providing a single, streamlined location for accessing information and searching across all Seqera tools. Old links will still redirect, ensuring a smooth transition.", + "_key": "e79f1010d2450", + "_type": "span" + } + ] + }, + { + "_key": "d4f6fd75df99", + "markDefs": [ + { + "href": "https://seqera.io/multiqc/", + "_key": "04cd3809c790", + "_type": "link" + }, + { + "_type": "link", + "href": "https://multiqc.info", + "_key": "205fbd181003" + } + ], + "children": [ + { + "_key": "44fc010597ec0", + "_type": "span", + "marks": [], + "text": "We’re also excited to announce that we’re launching a new MultiQC product page at " + }, + { + "text": "https://seqera.io/multiqc/", + "_key": "44fc010597ec1", + "_type": "span", + "marks": [ + "04cd3809c790" + ] + }, + { + "_key": "44fc010597ec2", + "_type": "span", + "marks": [], + "text": " with an updated design, which will replace " + }, + { + "_type": "span", + "marks": [ + "205fbd181003" + ], + "text": "https://multqc.info", + "_key": "44fc010597ec3" + }, + { + "marks": [], + "text": ". This fresh look aligns with the rest of the Seqera ecosystem, making it easier to explore MultiQC’s features and stay up to date with future developments.", + "_key": "44fc010597ec4", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + } + ], + "tags": [ + { + "_ref": "ea6c309b-154f-45c3-9fda-650d7764b260", + "_type": "reference", + "_key": "933aa64152ba" + }, + { + "_type": "reference", + "_key": "0e00c52955ba", + "_ref": "be8b298c-af12-4b5f-89cd-d2e208580926" + } + ], + "author": { + "_ref": "phil-ewels", + "_type": "reference" + }, + "_type": "blogPost", + "_rev": "mvya9zzDXWakVjnX4hBcNe", + "_id": "28fbd463-3640-4195-8c8f-82cf183846f9", + "meta": { + "_type": "meta", + "shareImage": { + "asset": { + "_ref": "image-d7dd7dfbf392ebb35e2f6a2be71934efc944ccc4-1200x1200-png", + "_type": "reference" + }, + "_type": "image" + }, + "description": "Introducing grouped table rows with collapsed sub-samples! Also big performance improvements and a new ability to work as a Python library within scripts, notebooks and Python apps.", + "noIndex": false, + "slug": { + "current": "multiqc-grouped-samples", + "_type": "slug" + } + } + }, + { + "author": { + "_ref": "evan-floden", + "_type": "reference" + }, + "meta": { + "description": "Today marks a major milestone in that journey as we release two new free and open resources for the community: Seqera Pipelines and Seqera Containers.", + "noIndex": false, + "slug": { + "current": "introducing-seqera-pipelines-containers", + "_type": "slug" + }, + "_type": "meta", + "shareImage": { + "asset": { + "_ref": "image-85ca91b4138fbab39962965a2ac2eec7e49514bf-4800x2700-png", + "_type": "reference" + }, + "_type": "image" + } + }, + "body": [ + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Seqera is built on the promise that modern tooling and open software can improve scientists’ daily lives. We believe in empowering scientists and developers to focus on what they do best: groundbreaking research. Today marks a major milestone in that journey as we release two new free and open resources for the community: Seqera Pipelines and Seqera Containers.", + "_key": "a8a33347272f0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "a558c16e7d96" + }, + { + "style": "normal", + "_key": "a5a7c42890d3", + "markDefs": [], + "children": [ + { + "_key": "1f5f2a98e9c80", + "_type": "span", + "marks": [], + "text": "These projects bring together the components bioinformaticians need into a simple interface, making it easy to find open-source pipelines to run or build a software container combining virtually any tools. By streamlining access to resources and fostering collaboration, we improve the velocity, quality, and reproducibility of your research." + } + ], + "_type": "block" + }, + { + "children": [ + { + "marks": [], + "text": "Seqera Pipelines: Guiding Your Research Journey", + "_key": "5d6bde0200f20", + "_type": "span" + } + ], + "_type": "block", + "style": "h2", + "_key": "ee4ea263d7a6", + "markDefs": [] + }, + { + "style": "normal", + "_key": "0ee406b47b21", + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/nextflow-io/awesome-nextflow", + "_key": "0bac7a322b6a" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "In the early days of Nextflow, the ", + "_key": "6a2a8f59d99f0" + }, + { + "_key": "6a2a8f59d99f1", + "_type": "span", + "marks": [ + "0bac7a322b6a" + ], + "text": "“awesome-nextflow” GitHub repository" + }, + { + "_type": "span", + "marks": [], + "text": " was the go-to place to find pipelines. People would list their open-source workflows so that others could find one to match their data. Over time, the Nextflow community grew, and this particular resource became unmanageable. Projects such as nf-core have emerged with collections of workflows, but there are very many other high-quality Nextflow pipelines beyond nf-core that can be difficult to find.", + "_key": "6a2a8f59d99f2" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "4b279492ce7a0", + "_type": "span", + "marks": [], + "text": "Seqera Pipelines is the modern replacement for the " + }, + { + "_type": "span", + "marks": [ + "em" + ], + "text": "“awesome-nextflow”", + "_key": "4b279492ce7a1" + }, + { + "_type": "span", + "marks": [], + "text": " repo. We’ve put together a list of the best open-source workflows for you to search. We know from experience that finding high-quality pipelines is critical, so we’re using a tightly curated list of the very best workflows to begin with. Every pipeline comes with curated test data, so you can import into Seqera Platform and launch a test run in just a few clicks:", + "_key": "4b279492ce7a2" + } + ], + "_type": "block", + "style": "normal", + "_key": "b91d8e210a02", + "markDefs": [] + }, + { + "_type": "youtube", + "id": "KWw0NP-CT_s", + "_key": "659e5fb9c13f" + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "3aed490d3739", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "3f97cf65f113" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://nextflow.io/", + "_key": "58d9d8012ab0" + }, + { + "_key": "9a41867bc689", + "_type": "link", + "href": "https://github.com/seqeralabs/tower-cli" + }, + { + "_type": "link", + "href": "https://nf-co.re/docs/nf-core-tools/pipelines/launch", + "_key": "92b93540f2b1" + } + ], + "children": [ + { + "text": "Once you’ve found an interesting pipeline, you can easily dive into the details. We show key information on the pipeline details page and provide a one-click experience to add pipelines to your launchpad within Seqera Platform. If you’re more at home in the terminal, you can use the launch box to grab commands for ", + "_key": "72ba431d1b440", + "_type": "span", + "marks": [] + }, + { + "_key": "72ba431d1b441", + "_type": "span", + "marks": [ + "58d9d8012ab0" + ], + "text": "Nextflow" + }, + { + "_key": "72ba431d1b442", + "_type": "span", + "marks": [], + "text": ", " + }, + { + "_type": "span", + "marks": [ + "9a41867bc689" + ], + "text": "Seqera Platform CLI", + "_key": "72ba431d1b443" + }, + { + "_key": "72ba431d1b444", + "_type": "span", + "marks": [], + "text": ", and " + }, + { + "text": "nf-core/tools", + "_key": "72ba431d1b445", + "_type": "span", + "marks": [ + "92b93540f2b1" + ] + }, + { + "marks": [], + "text": ".", + "_key": "72ba431d1b446", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "82628c058ed8" + }, + { + "style": "normal", + "_key": "414a3516da55", + "markDefs": [], + "children": [ + { + "_key": "4e2e0870666b0", + "_type": "span", + "marks": [], + "text": "We have big plans for Seqera Pipelines. By prioritizing actively maintained pipelines that adhere to industry standards, we minimize the risk of researchers encountering obsolete or malfunctioning pipelines. As we improve our accuracy, we will open up the catalog to include greater numbers of workflows." + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "7064064535a60", + "_type": "span", + "marks": [], + "text": "Discovering a workflow is only the first step of a journey. In the future, we will extend Seqera Pipelines with additional features, such as the ability to create collections of your favorite pipelines and discuss their usage – both to get help and to help others in the community. Seqera Pipelines is already the best place to find your next workflow, and it’s only going to get better." + } + ], + "_type": "block", + "style": "normal", + "_key": "1c4ddb148e2c" + }, + { + "markDefs": [], + "children": [ + { + "text": "Seqera Containers: The Magic of Reproducibility", + "_key": "8a75a73cb2f80", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2", + "_key": "dd0249c1bab7" + }, + { + "_key": "b5ae89a0a5a4", + "markDefs": [ + { + "_type": "link", + "href": "https://nextflow.io/podcast/2023/ep13_nextflow_10_years.html", + "_key": "122f56122a7d" + } + ], + "children": [ + { + "text": "Containers have transformed the research landscape, providing portable environments that encapsulate software, dependencies, and libraries – eliminating compatibility issues across various computing environments. Nextflow was a ", + "_key": "0052d9e9bd970", + "_type": "span", + "marks": [] + }, + { + "text": "very early adopter", + "_key": "0052d9e9bd971", + "_type": "span", + "marks": [ + "122f56122a7d" + ] + }, + { + "text": " of Docker and has provided first-class support for software containers for nearly a decade.", + "_key": "0052d9e9bd972", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "47493cbada63", + "markDefs": [ + { + "href": "https://biocontainers.pro/", + "_key": "618378cd6cb4", + "_type": "link" + }, + { + "href": "https://bioconda.github.io/", + "_key": "e82d6ce5c752", + "_type": "link" + } + ], + "children": [ + { + "text": "While using containers isn’t entirely without friction. Pipeline developers need to write Dockerfile scripts for each step in their workflow. Projects such as ", + "_key": "cddca7666cf60", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "618378cd6cb4" + ], + "text": "BioContainers", + "_key": "cddca7666cf61" + }, + { + "_type": "span", + "marks": [], + "text": " have greatly simplified this process with pre-built images for ", + "_key": "cddca7666cf62" + }, + { + "marks": [ + "e82d6ce5c752" + ], + "text": "Bioconda", + "_key": "cddca7666cf63", + "_type": "span" + }, + { + "marks": [], + "text": " tools but are somewhat limited, especially when multiple tools are needed in a single container. We set out to improve this experience with Wave: our open-source on-demand container provisioning service. Wave allows Nextflow developers to simply reference a set of conda packages or a bundled Dockerfile. When the pipeline runs, the container is built on the fly and can be targeted for the specific local environment that the workflow is running in.", + "_key": "cddca7666cf64", + "_type": "span" + } + ] + }, + { + "markDefs": [ + { + "_key": "a60f91c08427", + "_type": "link", + "href": "https://seqera.io/containers" + } + ], + "children": [ + { + "marks": [], + "text": "With ", + "_key": "19330cec0f8c0", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "a60f91c08427" + ], + "text": "Seqera Containers", + "_key": "19330cec0f8c1" + }, + { + "text": ", we’re taking the experience of Wave one step further. Instead of browsing available images as you would with a traditional container registry, just type in the names of the tools you want to use. Clicking “Get container” returns a container URI instantly, which you can use for anything - Nextflow pipeline or not. The key difference with Seqera Containers is that the image is also stored in an image cache, with infrastructure provided by our friends at AWS. Subsequent requests for the same package set will return the same image, ensuring reproducibility across runs. The cache has no expiry date, so those images will still be there if you need to rerun your analysis in the future.", + "_key": "19330cec0f8c2", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "86a0cd5ff0e7" + }, + { + "id": "mk67PjOIp8o", + "_key": "c6c73031246e", + "_type": "youtube" + }, + { + "style": "normal", + "_key": "ea5a4d906865", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Not only can you request any combination of packages, but you can also select architecture and image format. Builds with linux/arm64 architecture promise to open up analysis to new, more efficient compute platforms. Choosing Singularity leads to a native Singularity / Apptainer build with an OCI-compliant architecture and even a URL to download the ", + "_key": "867e4c63d88b0", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": ".sif", + "_key": "867e4c63d88b1" + }, + { + "marks": [], + "text": " file directly.", + "_key": "867e4c63d88b2", + "_type": "span" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Clicking “View build details” for the container shows the full information of the Dockerfile, conda environment file, and build settings, as well as the complete build logs. Every container includes results from a security scan using ", + "_key": "2c3f04b3ed850" + }, + { + "_type": "span", + "marks": [ + "5dc792b284e9" + ], + "text": "Trivy", + "_key": "2c3f04b3ed851" + }, + { + "marks": [], + "text": " attached.", + "_key": "2c3f04b3ed852", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "3518d0cbbefc", + "markDefs": [ + { + "_type": "link", + "href": "https://trivy.dev/", + "_key": "5dc792b284e9" + } + ] + }, + { + "style": "normal", + "_key": "8fadea99d6d1", + "markDefs": [], + "children": [ + { + "_key": "60ca627f3ac10", + "_type": "span", + "marks": [], + "text": "While the web interface is the easiest way to get started with Seqera Containers, it doesn’t end there. The same functionality extends to Nextflow and the Wave CLI. Just tell Wave to “freeze” with a set of conda packages, and the resulting image will be cached in the public Seqera Containers registry." + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "845b89f38040", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Seqera Containers is a free service provided by Seqera and AWS. It does not require authentication of any kind to use, and is configured with very high rate limits so that nothing stops your pipeline from pulling 50 images all at once! We can’t wait to see how the entire bioinformatics community uses it, both Nextflow users and beyond.", + "_key": "060de8d675b40", + "_type": "span" + } + ] + }, + { + "_type": "block", + "style": "h2", + "_key": "1f7bbdd1a7fb", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "What lies ahead", + "_key": "a5f3661cee520", + "_type": "span" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_key": "5139413842540", + "_type": "span", + "marks": [], + "text": "Pipelines and Containers represent just the beginning of Seqera’s vision to be the home of open science. We think that these two resources can have a real impact on researchers around the globe, and we’re excited to continue working with them to extend their functionality. We’re committed to collaborating with the community to focus on the features that you need, so do let us know what you think and what you want next!" + } + ], + "_type": "block", + "style": "normal", + "_key": "9bb85bc9c54e" + }, + { + "_type": "block", + "style": "normal", + "_key": "cb6fe74411f4", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "\n", + "_key": "f64f44ea540a0", + "_type": "span" + } + ] + } + ], + "_rev": "mAO9W5hBo57qoxiglmBcPn", + "_createdAt": "2024-05-23T07:01:07Z", + "title": "Empowering scientists with seamless access to bioinformatics resources", + "publishedAt": "2024-05-23T12:00:00.000Z", + "tags": [ + { + "_ref": "ea6c309b-154f-45c3-9fda-650d7764b260", + "_type": "reference", + "_key": "ef12481e08d5" + }, + { + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058", + "_type": "reference", + "_key": "508790ebf0f9" + } + ], + "_type": "blogPost", + "_id": "35e0b13e-aa5a-4018-88c5-6a175d477f1d", + "_updatedAt": "2024-05-28T14:18:22Z" + }, + { + "meta": { + "_type": "meta", + "description": "Call for grants 2021 aimed at R&D Projects in AI and other digital technologies and their integration into value chains", + "noIndex": false, + "slug": { + "current": "optimization-computation-resources-ML-AI", + "_type": "slug" + } + }, + "body": [ + { + "asset": { + "_ref": "image-22a6d646f122e9df55c154735882a2cb56ae7d87-1600x225-jpg", + "_type": "reference" + }, + "_type": "image", + "_key": "2ca9d274f836" + }, + { + "style": "h3", + "_key": "f01b00d90f54", + "markDefs": [], + "children": [ + { + "text": "Call for grants 2021 aimed at R&D projects in AI and other digital technologies and their integration into value chains", + "_key": "958028fa63b50", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "6155ddde2c0a", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "9af02a5030750", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://www.red.es/es", + "_key": "64b2513c3ed1" + }, + { + "href": "https://commission.europa.eu/funding-tenders/find-funding/eu-funding-programmes/european-regional-development-fund-erdf_en#:~:text=The%20European%20Regional%20Development%20Fund,dedicated%20national%20or%20regional%20programmes.", + "_key": "19a9e11e0b53", + "_type": "link" + }, + { + "_type": "link", + "href": "https://next-generation-eu.europa.eu/index_en", + "_key": "47482c371588" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The project 'Optimization of computational resources for HPC workloads in the cloud through ML/AI' by Seqera Labs S.L. has been funded by the ", + "_key": "de8345dee2320" + }, + { + "_type": "span", + "marks": [ + "19a9e11e0b53" + ], + "text": "European Regional Development Fund (ERDF) ", + "_key": "fda11d32da9b" + }, + { + "_type": "span", + "marks": [], + "text": "of the ", + "_key": "7ee991ead9e1" + }, + { + "marks": [ + "47482c371588" + ], + "text": "European Union", + "_key": "34fa535e4601", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": ", coordinated and managed by ", + "_key": "b620d2b78c0b" + }, + { + "text": "red.es", + "_key": "41eb06c9ae78", + "_type": "span", + "marks": [ + "64b2513c3ed1" + ] + }, + { + "_type": "span", + "marks": [], + "text": ", aiming to carry out the development of technological entrepreneurship and technological demand within the framework of the Strategic Action of Digital Economy and Society of the State R&D&I Program oriented towards societal challenges.", + "_key": "7eae1ca7a7b9" + } + ], + "_type": "block", + "style": "normal", + "_key": "09aebbc4262f" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "4c8c550d6dea" + } + ], + "_type": "block", + "style": "normal", + "_key": "e210c007ebd3" + }, + { + "style": "h3", + "_key": "b0e67983a64e", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Project Description", + "_key": "78e9e7fb0ba20" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "b608cc1805b8", + "markDefs": [], + "children": [ + { + "text": "The project aims to develop a machine learning model to optimize workflow execution in the cloud, ensuring efficient use of resources. This enables users to control execution costs and achieve significant savings. Through this project's implementation, it is expected that the application of this technology will not only reduce costs and execution time but also minimize the environmental impact of computing tasks. Seqera Labs plays a key role in advancing personalized medicine and the discovery of new drugs.", + "_key": "71c829c8b959", + "_type": "span", + "marks": [] + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "eaac793c2e93", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "File number: 2021/C005/00149902", + "_key": "96acc3bfbd660", + "_type": "span" + } + ], + "level": 1 + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "de5b39af36aa", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "8b0400c9463d", + "_type": "span", + "marks": [], + "text": "Total investment: €1,165,466.66" + } + ] + }, + { + "_key": "0c274b82d229", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Amount of aid: €669,279.99", + "_key": "034a4c2dc3db" + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "\nConvocatoria de ayudas 2021 destinadas a proyectos de investigación y desarrollo en IA y otras technologías digitales y su integración en las cadenas de valor", + "_key": "a0489e395fa60" + } + ], + "_type": "block", + "style": "h3", + "_key": "39cdd31e17dd", + "markDefs": [] + }, + { + "_key": "a59731faab3f", + "markDefs": [ + { + "_type": "link", + "href": "https://www.red.es/es", + "_key": "44d9a8fb6c5c" + }, + { + "href": "https://commission.europa.eu/funding-tenders/find-funding/eu-funding-programmes/european-regional-development-fund-erdf_en#:~:text=The%20European%20Regional%20Development%20Fund,dedicated%20national%20or%20regional%20programmes.", + "_key": "5444a1f98c6a", + "_type": "link" + }, + { + "_key": "a8150942df91", + "_type": "link", + "href": "https://next-generation-eu.europa.eu/index_en" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "El proyecto de ‘Optimización de los recursos computacionales para las cargas de trabajo de HPC en la nube mediante ML/AI’ de Seqera Labs S.L. ha sido financiado por el ", + "_key": "21a068641a3c0" + }, + { + "text": "Fondo Europeo de Desarrollo Regional (FEDER)", + "_key": "40aa3b3d0ecc", + "_type": "span", + "marks": [ + "5444a1f98c6a" + ] + }, + { + "text": " de la ", + "_key": "78ce1222a23b", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "a8150942df91" + ], + "text": "Unión Europea", + "_key": "e9cd306a59f1" + }, + { + "text": ", coordinada y gestionada por ", + "_key": "1ea233b7e3bc", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "44d9a8fb6c5c" + ], + "text": "red.es", + "_key": "7045f57ce29d" + }, + { + "_type": "span", + "marks": [], + "text": ", con el objetivo llevar a cabo el desarrollo del emprendimiento tecnológico y la demanda tecnológica, en el marco de la Acción Estratégica de Economía y Sociedad Digital del Programa Estatal de I+D+i orientada a retos de la sociedad.", + "_key": "e3e2efd75bba" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "5f02caf6ec9e", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "a0b3a9584258" + }, + { + "style": "h3", + "_key": "7c937551c3d3", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Descripción del proyecto", + "_key": "8916706675c00", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "El proyecto busca desarrollar un modelo de machine learning para optimizar la ejecución de flujos de trabajo en la nube, garantizando el uso eficiente de recursos. Esto permite a los usuarios controlar los costes de ejecución y lograr ahorros significativos. Con la presente ejecución del proyecto se espera que la aplicación de esta tecnología no solo reduzca los costes y el tiempo de ejecución, sino que también minimice el impacto ambiental de los trabajos de computación. Seqera Labs desempeña un papel fundamental en el avance de la medicina personalizada y el descubrimiento de nuevos medicamentos.", + "_key": "5b5110cae217", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "51fede68e0f0" + }, + { + "children": [ + { + "text": "Expediente nº: 2021/C005/00149902", + "_key": "ad6851bdbc700", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "d7bc90b57c58", + "listItem": "bullet", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "f01aac1dae9b", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Inversión total: 1.165.466,66 €", + "_key": "aaf0c99b85540" + } + ], + "level": 1 + }, + { + "markDefs": [], + "children": [ + { + "_key": "5e32e917e71e0", + "_type": "span", + "marks": [], + "text": "Importe de la ayuda: 669.279,99 €" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "c724b1e64798", + "listItem": "bullet" + } + ], + "tags": [ + { + "_type": "reference", + "_key": "ce64efeb3685", + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058" + }, + { + "_key": "40689d831034", + "_ref": "d356a4d5-06c1-40c2-b655-4cb21cf74df1", + "_type": "reference" + } + ], + "_createdAt": "2024-07-26T11:11:53Z", + "_rev": "0HV4XeadlxB19r3p3EDEa1", + "title": "Seqera's project on the optimization of computational resources for HPC workloads in the cloud through ML/AI has been funded by the European Union", + "publishedAt": "2024-06-05T13:38:00.000Z", + "_type": "blogPost", + "author": { + "_ref": "a7e6fb2d-94cb-4bcd-bcbd-120e379b2298", + "_type": "reference" + }, + "_id": "38329391-8e62-4aba-b4fa-32c658e33b13", + "_updatedAt": "2024-08-23T14:06:31Z" + }, + { + "_updatedAt": "2024-10-11T07:26:17Z", + "_id": "4ec4b56d-7cc0-4395-bb84-83f0e70b3f65", + "body": [ + { + "style": "normal", + "_key": "f9560979d244", + "markDefs": [], + "children": [ + { + "text": "We are excited to launch our new ", + "_key": "2416087f7fc20", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Step-by-Step blog series ", + "_key": "b54b44c628b4" + }, + { + "_key": "c48b795647da", + "_type": "span", + "marks": [], + "text": "on running Nextflow pipelines in Seqera Platform. With accompanying technical guides, the series also demonstrates how to create and configure environments for flexible tertiary analysis and troubleshooting with Data Studios." + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "8df044584c5b", + "markDefs": [ + { + "href": "https://nf-co.re/rnaseq/3.14.0/", + "_key": "89cc513e49f2", + "_type": "link" + } + ], + "children": [ + { + "marks": [], + "text": "First up: bulk RNA sequencing (RNA-Seq) analysis with the popular ", + "_key": "9e78ccaa040f0", + "_type": "span" + }, + { + "text": "nf-core/rnaseq pipeline", + "_key": "9e78ccaa040f1", + "_type": "span", + "marks": [ + "89cc513e49f2" + ] + }, + { + "_key": "9e78ccaa040f2", + "_type": "span", + "marks": [], + "text": "." + } + ] + }, + { + "_key": "9db3354d0bc0", + "asset": { + "_ref": "image-86381b024e7fed16914933c27bbe38ccfd8e1218-2265x946-png", + "_type": "reference" + }, + "_type": "image" + }, + { + "children": [ + { + "text": "The challenge of bulk RNA-Seq analysis", + "_key": "0a14112491e40", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2", + "_key": "86b1f5b85f80", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "fd6999649d8e", + "markDefs": [ + { + "_key": "42ba36bdabd3", + "_type": "link", + "href": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4406561/" + }, + { + "_key": "46970291c5f9", + "_type": "link", + "href": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9718390/#pone.0278609.ref002" + } + ], + "children": [ + { + "marks": [], + "text": "A single RNA-Seq experiment can generate ", + "_key": "b7221a3c3d190", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "42ba36bdabd3" + ], + "text": "gigabytes, or even terabytes", + "_key": "b7221a3c3d191" + }, + { + "text": ", of raw data. Translating this data into meaningful scientific results demands ", + "_key": "b7221a3c3d192", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "46970291c5f9" + ], + "text": "substantial computational power, automation, and storage", + "_key": "b7221a3c3d193" + }, + { + "text": ".", + "_key": "b7221a3c3d194", + "_type": "span", + "marks": [] + } + ] + }, + { + "_key": "b5e33c75a3a6", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "197efa67c9e40", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "9d6e316ffb52", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "As data volumes continue to grow, analysis becomes increasingly complex, especially when leveraging public resources while maintaining full sovereignty over your data. The solution?", + "_key": "66e0cb26931d0" + }, + { + "marks": [ + "strong" + ], + "text": " Seqera — a centralized bio data stack", + "_key": "66e0cb26931d1", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " for bulk RNA-Seq analysis.", + "_key": "66e0cb26931d2" + } + ] + }, + { + "style": "normal", + "_key": "8191c9a75257", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "60fff7fa9e5d0", + "_type": "span" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "In this blog post, we provide a step-by-step guide to analyze RNA-Seq data with Seqera, from quality control to differential expression analysis. We also demonstrate how to perform downstream analysis and visualize your data in a unified location.", + "_key": "99674b059d630" + } + ], + "_type": "block", + "style": "normal", + "_key": "1f23b3e52023", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "536666ed7688", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "93b49ab6c33f0", + "_type": "span", + "marks": [] + } + ] + }, + { + "style": "blockquote", + "_key": "f50c329381fa", + "markDefs": [ + { + "href": "https://hubs.la/Q02T26c10", + "_key": "c560b9e28fb8", + "_type": "link" + } + ], + "children": [ + { + "text": "Check out the full", + "_key": "b3a027e204c0", + "_type": "span", + "marks": [] + }, + { + "text": " ", + "_key": "8f1f21426b6b", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "81f13e2f0aa9", + "_type": "span", + "marks": [ + "strong", + "c560b9e28fb8" + ], + "text": "RNA-Seq guide" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": " ", + "_key": "6659f0e1d752" + }, + { + "_type": "span", + "marks": [], + "text": "now", + "_key": "4c954b7253b7" + } + ], + "_type": "block" + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "bb9706570246", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "1f530ea7976a", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Perform bulk RNA-Seq analysis in Seqera ", + "_key": "e0c48b21920f0" + } + ], + "_type": "block", + "style": "h2", + "_key": "c9caa0f35d6d" + }, + { + "markDefs": [], + "children": [ + { + "_key": "af84e8b35d8a", + "_type": "span", + "marks": [], + "text": "\n" + } + ], + "_type": "block", + "style": "normal", + "_key": "063186af7ec2" + }, + { + "children": [ + { + "marks": [], + "text": "1. Add a compute environment", + "_key": "4d8c8e640876", + "_type": "span" + } + ], + "_type": "block", + "style": "h3", + "_key": "9e533abca7ae", + "markDefs": [] + }, + { + "children": [ + { + "_key": "d55935c16025", + "_type": "span", + "marks": [], + "text": "In Seqera, you are not limited to hosted compute solutions. Add and configure your choice of cloud or HPC compute environments tailored to your analysis needs in your organization workspace.\n" + } + ], + "_type": "block", + "style": "normal", + "_key": "fdb30e0a53c3", + "markDefs": [] + }, + { + "markDefs": [ + { + "href": "https://deploy-preview-131--seqera-docs.netlify.app/platform/24.1.1/getting-started/rnaseq#rna-seq-data-and-requirements", + "_key": "231c3c1f5d6e", + "_type": "link" + } + ], + "children": [ + { + "_key": "6630bd2611cb0", + "_type": "span", + "marks": [], + "text": "💡 " + }, + { + "text": "Hint: ", + "_key": "d5d5e50eb8af", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "e70a96b76c37", + "_type": "span", + "marks": [], + "text": "Depending on the number of samples and the sequencing depth of your input data, select the desired " + }, + { + "_key": "6630bd2611cb1", + "_type": "span", + "marks": [ + "231c3c1f5d6e" + ], + "text": "compute and storage recommendations" + }, + { + "_key": "6630bd2611cb2", + "_type": "span", + "marks": [], + "text": " for your RNA-Seq analysis." + } + ], + "_type": "block", + "style": "blockquote", + "_key": "d343783891dd" + }, + { + "_type": "image", + "_key": "7c3d10af89b6", + "asset": { + "_type": "reference", + "_ref": "image-11f5e2e5a1fdf1554329af5843be890dcf7f60b0-2452x1080-gif" + } + }, + { + "children": [ + { + "marks": [], + "text": "See the ", + "_key": "3340851e8d700", + "_type": "span" + }, + { + "marks": [ + "92d7b15144de", + "strong" + ], + "text": "full RNASeq guide", + "_key": "c394ca815f2b", + "_type": "span" + }, + { + "_key": "e01af32724dd", + "_type": "span", + "marks": [], + "text": " for AWS Batch compute environment configuration steps." + } + ], + "_type": "block", + "style": "blockquote", + "_key": "4f47a0ac33d5", + "markDefs": [ + { + "href": "https://hubs.la/Q02T26c10", + "_key": "92d7b15144de", + "_type": "link" + } + ] + }, + { + "_key": "b82d885bb30e", + "markDefs": [], + "children": [ + { + "_key": "338b9773274b", + "_type": "span", + "marks": [], + "text": "\n" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "h3", + "_key": "3277fa6e7a1e", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "2. Add the nf-core/rnaseq pipeline to your workspace", + "_key": "f09af77e58640" + } + ] + }, + { + "_key": "8589cc2e751f", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/pipelines/", + "_key": "9dabd7634c9e" + } + ], + "children": [ + { + "_key": "fff87197a1c90", + "_type": "span", + "marks": [], + "text": "Quickly locate and import the nf-core/rnaseq pipeline from " + }, + { + "text": "Seqera Pipelines", + "_key": "ee81bf6bc3d1", + "_type": "span", + "marks": [ + "9dabd7634c9e" + ] + }, + { + "text": ", the largest curated open source repository of Nextflow pipelines.\n", + "_key": "5490456192c1", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "image", + "_key": "af17be5a781e", + "asset": { + "_type": "reference", + "_ref": "image-cbd868250d3235cc42d5d2b9afed55cf4a51afc4-2452x1080-gif" + } + }, + { + "style": "normal", + "_key": "9097e2b50848", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "\n", + "_key": "c923b38b50c7" + } + ], + "_type": "block" + }, + { + "style": "h3", + "_key": "b828d3ebe44c", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "3. Add your input data", + "_key": "fef4b73d0a460", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "d3e46de119d6" + } + ], + "_type": "block", + "style": "normal", + "_key": "29ca750385d3" + }, + { + "markDefs": [ + { + "_key": "0b4ee73ccb98", + "_type": "link", + "href": "https://docs.seqera.io/platform/23.3/data/data-explorer" + }, + { + "_key": "3d81dbad7494", + "_type": "link", + "href": "https://docs.seqera.io/platform/23.2/datasets/overview" + } + ], + "children": [ + { + "marks": [], + "text": "Easily access your RNA-Seq data directly from cloud storage with ", + "_key": "9219b4d669940", + "_type": "span" + }, + { + "marks": [ + "0b4ee73ccb98" + ], + "text": "Data Explorer", + "_key": "66f2139acb5c", + "_type": "span" + }, + { + "marks": [], + "text": ", or upload your samplesheets as CSV or TSV files with ", + "_key": "93a5a85286cc", + "_type": "span" + }, + { + "_key": "d7ebdb1b9163", + "_type": "span", + "marks": [ + "3d81dbad7494" + ], + "text": "Seqera Datasets" + }, + { + "marks": [], + "text": ".", + "_key": "174bdaa61b05", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "d3e6240f6a5b" + }, + { + "style": "normal", + "_key": "2e919cf9fcfe", + "markDefs": [], + "children": [ + { + "_key": "220e46a63df8", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block" + }, + { + "asset": { + "_ref": "image-c4001e1a1358d7824560347d93e5f73380c2ecbc-2842x1430-gif", + "_type": "reference" + }, + "_type": "image", + "_key": "014b6b9185b0" + }, + { + "style": "normal", + "_key": "b670539c67cd", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "76b62d63229a", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "blockquote", + "_key": "14b3055d38ba", + "markDefs": [ + { + "href": "https://docs.seqera.io/platform/24.1/getting-started/quickstart-demo/add-data", + "_key": "ee3e29b1836a", + "_type": "link" + } + ], + "children": [ + { + "text": "For more information on how to add samplesheets or other data to your workspace, see ", + "_key": "34d2e93139c10", + "_type": "span", + "marks": [] + }, + { + "text": "Add data", + "_key": "34d2e93139c11", + "_type": "span", + "marks": [ + "ee3e29b1836a", + "strong" + ] + }, + { + "text": ".", + "_key": "34d2e93139c12", + "_type": "span", + "marks": [] + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "\n4. Launch your RNA-Seq analysis", + "_key": "d8ffc77eb2eb0" + } + ], + "_type": "block", + "style": "h3", + "_key": "200f421da11f" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "140e7fde60d1" + } + ], + "_type": "block", + "style": "normal", + "_key": "7b5a1dd73392" + }, + { + "_key": "297cf7a52db2", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "So far, you have:\n", + "_key": "7817985b2a6f0" + }, + { + "_key": "7817985b2a6f1", + "_type": "span", + "marks": [], + "text": "✔ Created a compute environment\n✔ Added a pipeline to your workspace\n✔ Made your RNA-Seq data accessible" + } + ], + "_type": "block", + "style": "blockquote" + }, + { + "_type": "block", + "style": "normal", + "_key": "66b8edceceae", + "markDefs": [], + "children": [ + { + "_key": "b6ded3b5950b", + "_type": "span", + "marks": [], + "text": "" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "7372b7e9b188", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "With your compute environment, pipeline, and data all accessible in your Seqera workspace, you are now ready to launch your analysis.", + "_key": "327b97a200040", + "_type": "span" + } + ] + }, + { + "_key": "9e9e369ea4ea", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "6640e6b2d8ad", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "image", + "_key": "4e5386177db6", + "asset": { + "_type": "reference", + "_ref": "image-ec22f1a3f3bf30daa89a6e2299af6d90e324f5f1-2452x1080-gif" + } + }, + { + "markDefs": [], + "children": [ + { + "_key": "464c6a3ffbbd", + "_type": "span", + "marks": [], + "text": "\n" + } + ], + "_type": "block", + "style": "normal", + "_key": "feeede0ec4b6" + }, + { + "_key": "182c2347f18d", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "5. Monitor your pipeline run", + "_key": "020fdd0a93170", + "_type": "span" + } + ], + "_type": "block", + "style": "h3" + }, + { + "markDefs": [], + "children": [ + { + "_key": "554f6ea184ee", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "9a7c4161c7b5" + }, + { + "_key": "d59004f41672", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Monitor your RNA-Seq analysis in real-time with aggregated statistics, workflow metrics, execution logs, and task details.", + "_key": "120b0356a9ef0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "image", + "_key": "4912e62172b9", + "asset": { + "_type": "reference", + "_ref": "image-9fd15d225aeb54b8c2841bc74a54e42a5c8bf410-2844x1390-gif" + } + }, + { + "style": "normal", + "_key": "4aec37e32e6f", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "\n", + "_key": "394eff86aae5" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "h3", + "_key": "562e0b16234f", + "markDefs": [], + "children": [ + { + "_key": "00861713cf910", + "_type": "span", + "marks": [], + "text": "6. Visualize results in a single, shareable report" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_key": "46601358193a", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "ea18e816a43b" + }, + { + "style": "normal", + "_key": "59e41678413f", + "markDefs": [], + "children": [ + { + "text": "Generate a single HTML report with MultiQC for your RNA-Seq analysis to assess the integrity of your results, including statistics, alignment scores, and quality control metrics. Easily share your findings with collaborators via the report URL.", + "_key": "c4666f11149d0", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "a8cf58fd9ac7", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "b19eb6571ffe" + }, + { + "asset": { + "_ref": "image-1adf78a2589c3429a67b2d2935dc62ac0139e06c-2452x1080-gif", + "_type": "reference" + }, + "_type": "image", + "_key": "ce897e818b5d" + }, + { + "children": [ + { + "text": "", + "_key": "8bd8a33eaca0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "bc8b22dc8bcb", + "markDefs": [] + }, + { + "_type": "block", + "style": "blockquote", + "_key": "86269ae87f15", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "💡", + "_key": "836844ce5743" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Hint:", + "_key": "f46f32f1d643" + }, + { + "text": " Easily share your findings with collaborators via the report URL.", + "_key": "57892c4eb147", + "_type": "span", + "marks": [] + } + ] + }, + { + "_type": "block", + "style": "h3", + "_key": "bddab7973f06", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "\n7. Perform interactive downstream analysis adjacent to your pipeline outputs", + "_key": "b25adfd756ce0", + "_type": "span" + } + ] + }, + { + "style": "normal", + "_key": "73f5877cf28c", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "bb7ed0bd126d", + "_type": "span" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "d20a88decf2d", + "_type": "span", + "marks": [], + "text": "RNA-Seq analysis often requires human interpretation or further downstream analysis of pipeline outputs. For example, using " + }, + { + "marks": [ + "strong" + ], + "text": "DESeq2", + "_key": "ee61aae39e891", + "_type": "span" + }, + { + "marks": [], + "text": " for differential gene expression analysis.", + "_key": "ee61aae39e892", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "0fd3c84dff27", + "markDefs": [] + }, + { + "style": "normal", + "_key": "f200cfbaf920", + "markDefs": [ + { + "_type": "link", + "href": "https://docs.seqera.io/platform/24.1/data/data-studios", + "_key": "43bb4dfea049" + } + ], + "children": [ + { + "marks": [], + "text": "Bring interactive analytical notebook environments (RStudio, Jupyter, VSCode) adjacent to your data with ", + "_key": "764a091e4b5c0", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "43bb4dfea049" + ], + "text": "Seqera’s Data Studios", + "_key": "764a091e4b5c1" + }, + { + "marks": [], + "text": " and perform downstream analysis as if you were running locally.", + "_key": "764a091e4b5c2", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_type": "image", + "_key": "fc4e3a11cc58", + "asset": { + "_type": "reference", + "_ref": "image-9fed530cfba0aa3bd72f477449603e8bded83f09-2452x1080-gif" + } + }, + { + "_type": "block", + "style": "normal", + "_key": "af81eb9ab77f", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "d56aeedbf5a4", + "_type": "span", + "marks": [] + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Check out the ", + "_key": "153ee960e8ee0" + }, + { + "_type": "span", + "marks": [ + "c027a3adceef", + "strong" + ], + "text": "full RNASeq guide", + "_key": "f2f0efceb629" + }, + { + "text": " ", + "_key": "abc753492ace", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": "now", + "_key": "cba517eb8e9c" + } + ], + "_type": "block", + "style": "blockquote", + "_key": "73600369d6a7", + "markDefs": [ + { + "_type": "link", + "href": "https://hubs.la/Q02T26c10", + "_key": "c027a3adceef" + } + ] + }, + { + "_type": "block", + "style": "h2", + "_key": "ca0db6380e4d", + "markDefs": [], + "children": [ + { + "_key": "422d6784ced7", + "_type": "span", + "marks": [ + "strong" + ], + "text": "\nTry Seqera for free" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "e30a2b48e8c5" + } + ], + "_type": "block", + "style": "normal", + "_key": "814ff460cdd6" + }, + { + "_type": "block", + "style": "normal", + "_key": "f8ca9ce7dfd1", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "By leveraging cloud-native technology, Seqera bridges the gap between experimental data and computational analysis, allowing you to accelerate the time from data generation to meaningful scientific insights.", + "_key": "f115a7417898", + "_type": "span" + } + ] + }, + { + "markDefs": [ + { + "_key": "e87779c2247d", + "_type": "link", + "href": "https://hubs.la/Q02T26TB0" + } + ], + "children": [ + { + "text": "Sign-up", + "_key": "8e509cdc34581", + "_type": "span", + "marks": [ + "e87779c2247d", + "strong" + ] + }, + { + "text": " for free", + "_key": "22644e6e6b12", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "blockquote", + "_key": "d6ccd91918ea" + } + ], + "_createdAt": "2024-10-02T07:26:47Z", + "author": { + "_type": "reference", + "_ref": "7691d57c-16a2-4ca7-a29a-fa5d9b158a3b" + }, + "tags": [ + { + "_key": "e4630a226ba3", + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058", + "_type": "reference" + } + ], + "meta": { + "noIndex": false, + "slug": { + "_type": "slug", + "current": "step-by-step-rna-seq" + }, + "_type": "meta", + "description": "We are excited to launch our new Step-by-Step blog series on running Nextflow pipelines in Seqera Platform. With accompanying technical guides, the series also demonstrates how to create and configure environments for flexible tertiary analysis and troubleshooting with Data Studios." + }, + "_type": "blogPost", + "title": "Step-by-Step Series: RNA-Seq analysis in Seqera", + "_rev": "hf9hwMPb7ybAE3bqEITLMZ", + "publishedAt": "2024-10-11T07:54:00.000Z" + }, + { + "_id": "561ca06ac707", + "body": [ + { + "children": [ + { + "marks": [ + "em" + ], + "text": "Below is a step-by-step guide for creating [Docker](http://www.docker.io) images for use with [Nextflow](http://www.nextflow.io) pipelines. This post was inspired by recent experiences and written with the hope that it may encourage others to join in the virtualization revolution.", + "_key": "aa74c907fb89", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "5de644223001", + "markDefs": [] + }, + { + "style": "normal", + "_key": "fba2c75d251d", + "children": [ + { + "_type": "span", + "text": "", + "_key": "1e58c8a15fb2" + } + ], + "_type": "block" + }, + { + "_key": "50833a8d465d", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Modern science is built on collaboration. Recently I became involved with one such venture between several groups across Europe. The aim was to annotate long non-coding RNA (lncRNA) in farm animals and I agreed to help with the annotation based on RNA-Seq data. The basic procedure relies on mapping short read data from many different tissues to a genome, generating transcripts and then determining if they are likely to be lncRNA or protein coding genes.", + "_key": "5ad57d04cb9d", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "f171be4200cf" + } + ], + "_type": "block", + "style": "normal", + "_key": "df4dbb73e883" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "During several successful 'hackathon' meetings the best approach was decided and implemented in a joint effort. I undertook the task of wrapping the procedure up into a Nextflow pipeline with a view to replicating the results across our different institutions and to allow the easy execution of the pipeline by researchers anywhere.", + "_key": "85b35fa626c4" + } + ], + "_type": "block", + "style": "normal", + "_key": "84ce0feaea47", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "d043f09e00b4" + } + ], + "_type": "block", + "style": "normal", + "_key": "ca94bc941408" + }, + { + "style": "normal", + "_key": "974f1a1cdfa3", + "markDefs": [ + { + "_key": "99165958e6b5", + "_type": "link", + "href": "http://www.github.com/cbcrg/lncrna-annotation-nf" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Creating the Nextflow pipeline (", + "_key": "155a8a08d8cd" + }, + { + "_key": "357c4685588b", + "_type": "span", + "marks": [ + "99165958e6b5" + ], + "text": "here" + }, + { + "_key": "f3317867e3c0", + "_type": "span", + "marks": [], + "text": ") in itself was not a difficult task. My collaborators had documented their work well and were on hand if anything was not clear. However installing and keeping aligned all the pipeline dependencies across different the data centers was still a challenging task." + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "2a5c98bf3a96", + "_type": "span", + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "ab6f59d351cb" + }, + { + "markDefs": [ + { + "_key": "905a8bc500ad", + "_type": "link", + "href": "https://www.docker.com/" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The pipeline is typical of many in bioinformatics, consisting of binary executions, BASH scripting, R, Perl, BioPerl and some custom Perl modules. We found the BioPerl modules in particular where very sensitive to the various versions in the ", + "_key": "8390ee0ee4e6" + }, + { + "_type": "span", + "marks": [ + "em" + ], + "text": "long", + "_key": "c58b7dc20cce" + }, + { + "text": " dependency tree. The solution was to turn to ", + "_key": "e384258a5c3f", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "905a8bc500ad" + ], + "text": "Docker", + "_key": "48755f8b6d14" + }, + { + "_key": "236e84a2092d", + "_type": "span", + "marks": [], + "text": " containers." + } + ], + "_type": "block", + "style": "normal", + "_key": "004440881a96" + }, + { + "style": "normal", + "_key": "55e482405e7c", + "children": [ + { + "_type": "span", + "text": "", + "_key": "f4792876a9aa" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "df983b305d4f", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "I have taken this opportunity to document the process of developing the Docker side of a Nextflow + Docker pipeline in a step-by-step manner.", + "_key": "8fe4f707201e" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "649e13290a13" + } + ], + "_type": "block", + "style": "normal", + "_key": "64ccdad0c58d" + }, + { + "children": [ + { + "_key": "f8e4f2418ada", + "_type": "span", + "marks": [], + "text": "###Docker Installation" + } + ], + "_type": "block", + "style": "normal", + "_key": "9daaf61343a0", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "7dbae6fbfa16", + "children": [ + { + "_type": "span", + "text": "", + "_key": "22f03df3d9b5" + } + ] + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://docs.docker.com/engine/installation", + "_key": "b39b383b61e5" + }, + { + "_type": "link", + "href": "https://blog.docker.com/2016/02/docker-engine-1-10-security/", + "_key": "1664943865ae" + } + ], + "children": [ + { + "_key": "3af57ef1c497", + "_type": "span", + "marks": [], + "text": "By far the most challenging issue is the installation of Docker. For local installations, the " + }, + { + "text": "process is relatively straight forward", + "_key": "29497e07ff62", + "_type": "span", + "marks": [ + "b39b383b61e5" + ] + }, + { + "text": ". However difficulties arise as computing moves to a cluster. Owing to security concerns, many HPC administrators have been reluctant to install Docker system-wide. This is changing and Docker developers have been responding to many of these concerns with ", + "_key": "bac8833f273e", + "_type": "span", + "marks": [] + }, + { + "text": "updates addressing these issues", + "_key": "f4e68c0049e2", + "_type": "span", + "marks": [ + "1664943865ae" + ] + }, + { + "text": ".", + "_key": "ebdcec8ebe01", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "a438411f6220" + }, + { + "children": [ + { + "text": "", + "_key": "6ea9c938cc17", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "369a356018e0" + }, + { + "style": "normal", + "_key": "9c82fe0136e7", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "That being the case, local installations are usually perfectly fine for development. One of the golden rules in Nextflow development is to have a small test dataset that can run the full pipeline in minutes with few computational resources, ie can run on a laptop.", + "_key": "f06c6b5ed104" + } + ], + "_type": "block" + }, + { + "children": [ + { + "text": "", + "_key": "9f5f313834ae", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "11b1347afcab" + }, + { + "_type": "block", + "style": "normal", + "_key": "3640fc87e1c5", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "If you have Docker and Nextflow installed and you wish to view the working pipeline, you can perform the following commands to obtain everything you need and run the full lncrna annotation pipeline on a test dataset.", + "_key": "0b77a23d5bf7" + } + ] + }, + { + "_key": "9edd5abef435", + "children": [ + { + "_type": "span", + "text": "", + "_key": "b0ad6ffae120" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "e04747c2e377", + "code": "docker pull cbcrg/lncrna_annotation\nnextflow run cbcrg/lncrna-annotation-nf -profile test", + "_type": "code" + }, + { + "children": [ + { + "text": "[If the following does not work, there could be a problem with your Docker installation.]", + "_key": "fb8752c7e000", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "0fc16192bebe", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "0af154258b87" + } + ], + "_type": "block", + "style": "normal", + "_key": "e0523eff522a" + }, + { + "style": "normal", + "_key": "773b9de99fad", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "The first command will download the required Docker image in your computer, while the second will launch Nextflow which automatically download the pipeline repository and run it using the test data included with it.", + "_key": "36689d3a632c", + "_type": "span" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "0973e8d341fc" + } + ], + "_type": "block", + "style": "normal", + "_key": "6ba84ebe36e1" + }, + { + "_key": "1f30f62bc089", + "markDefs": [], + "children": [ + { + "text": "###The Dockerfile", + "_key": "3a33f2cb54af", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "50364dafcb96" + } + ], + "_type": "block", + "style": "normal", + "_key": "4cb45a2ade99" + }, + { + "style": "normal", + "_key": "3f1d99c7b705", + "markDefs": [], + "children": [ + { + "text": "The ", + "_key": "5f45a4596c7c", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "code" + ], + "text": "Dockerfile", + "_key": "6e92add363fc", + "_type": "span" + }, + { + "_key": "908e792d54df", + "_type": "span", + "marks": [], + "text": " contains all the instructions required by Docker to build the Docker image. It provides a transparent and consistent way to specify the base operating system and installation of all software, libraries and modules." + } + ], + "_type": "block" + }, + { + "children": [ + { + "text": "", + "_key": "eedc860980f3", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "eb6597312e37" + }, + { + "children": [ + { + "_key": "b0b033a77a83", + "_type": "span", + "marks": [], + "text": "We begin by creating a file " + }, + { + "text": "Dockerfile", + "_key": "69aa3263d8b0", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "marks": [], + "text": " in the Nextflow project directory. The Dockerfile begins with:", + "_key": "ea7e45e2295a", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "d4223ee66e84", + "markDefs": [] + }, + { + "style": "normal", + "_key": "b6aef4e4bff6", + "children": [ + { + "text": "", + "_key": "dd2d05610c5b", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_type": "code", + "_key": "c95932bd73bd", + "code": "# Set the base image to debian jessie\nFROM debian:jessie\n\n# File Author / Maintainer\nMAINTAINER Evan Floden " + }, + { + "markDefs": [], + "children": [ + { + "text": "This sets the base distribution for our Docker image to be Debian v8.4, a lightweight Linux distribution that is ideally suited for the task. We must also specify the maintainer of the Docker image.", + "_key": "dbd6ec0da776", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "dd72b4cb8f73" + }, + { + "children": [ + { + "_key": "8ce23ba404a7", + "_type": "span", + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "e3e22b6493fa" + }, + { + "_key": "24d492f7dd06", + "markDefs": [], + "children": [ + { + "text": "Next we update the repository sources and install some essential tools such as ", + "_key": "883b5be27cf1", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "wget", + "_key": "de93d23dcc24" + }, + { + "_type": "span", + "marks": [], + "text": " and ", + "_key": "a75f0d48042f" + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "perl", + "_key": "b8f1f6977f76" + }, + { + "_key": "3d2e30dbd5be", + "_type": "span", + "marks": [], + "text": "." + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "0b388a47cc16", + "children": [ + { + "_type": "span", + "text": "", + "_key": "74087f39767c" + } + ], + "_type": "block" + }, + { + "_type": "code", + "_key": "0781f0913220", + "code": "RUN apt-get update && apt-get install --yes --no-install-recommends \\\n wget \\\n locales \\\n vim-tiny \\\n git \\\n cmake \\\n build-essential \\\n gcc-multilib \\\n perl \\\n python ..." + }, + { + "_type": "block", + "style": "normal", + "_key": "3ca70fafd6b8", + "markDefs": [], + "children": [ + { + "_key": "82c7900bb435", + "_type": "span", + "marks": [], + "text": "Notice that we use the command " + }, + { + "text": "RUN", + "_key": "7029c2127e5e", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_type": "span", + "marks": [], + "text": " before each line. The ", + "_key": "cc075c2808b5" + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "RUN", + "_key": "5372b2fbc07e" + }, + { + "marks": [], + "text": " instruction executes commands as if they are performed from the Linux shell.", + "_key": "54c24028d590", + "_type": "span" + } + ] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "2f4253d9b870" + } + ], + "_type": "block", + "style": "normal", + "_key": "24e6cf4eeaad" + }, + { + "_key": "ac0cc4e414e7", + "markDefs": [ + { + "_type": "link", + "href": "https://blog.replicated.com/2016/02/05/refactoring-a-dockerfile-for-image-size/", + "_key": "3b99f1c6e0d0" + }, + { + "_type": "link", + "href": "https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/", + "_key": "ee681c47a630" + } + ], + "children": [ + { + "marks": [], + "text": "Also is good practice to group as many as possible commands in the same ", + "_key": "a715e201a410", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "RUN", + "_key": "4c0542b30503" + }, + { + "_type": "span", + "marks": [], + "text": " statement. This reduces the size of the final Docker image. See ", + "_key": "cd0129fc2cb4" + }, + { + "_type": "span", + "marks": [ + "3b99f1c6e0d0" + ], + "text": "here", + "_key": "95753b3703a7" + }, + { + "marks": [], + "text": " for these details and ", + "_key": "b3d6166d7b40", + "_type": "span" + }, + { + "marks": [ + "ee681c47a630" + ], + "text": "here", + "_key": "ea9f63a37e2f", + "_type": "span" + }, + { + "_key": "fec090986d03", + "_type": "span", + "marks": [], + "text": " for more best practices." + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "24659e48c3e7", + "children": [ + { + "_type": "span", + "text": "", + "_key": "9f35046732fb" + } + ], + "_type": "block" + }, + { + "_key": "57e5a413a943", + "markDefs": [ + { + "href": "http://search.cpan.org/~miyagawa/Menlo-1.9003/script/cpanm-menlo", + "_key": "d68e3d739fed", + "_type": "link" + } + ], + "children": [ + { + "_key": "ab9ae2c48fd3", + "_type": "span", + "marks": [], + "text": "Next we can specify the install of the required perl modules using " + }, + { + "text": "cpan minus", + "_key": "376a38ae89cc", + "_type": "span", + "marks": [ + "d68e3d739fed" + ] + }, + { + "text": ":", + "_key": "b82c42d7d1f5", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "a23d9bbf5ef9", + "children": [ + { + "text": "", + "_key": "0b5c9131deb9", + "_type": "span" + } + ] + }, + { + "_type": "code", + "_key": "e7530c3f6dba", + "code": "# Install perl modules\nRUN cpanm --force CPAN::Meta \\\n YAML \\\n Digest::SHA \\\n Module::Build \\\n Data::Stag \\\n Config::Simple \\\n Statistics::Lite ..." + }, + { + "children": [ + { + "_key": "c3ff2167e3c1", + "_type": "span", + "marks": [], + "text": "We can give the instructions to download and install software from GitHub using:" + } + ], + "_type": "block", + "style": "normal", + "_key": "83711b5bfb64", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "6891af5db4de" + } + ], + "_type": "block", + "style": "normal", + "_key": "00fd8f533a9a" + }, + { + "_key": "ac765553f6ad", + "code": "# Install Star Mapper\nRUN wget -qO- https://github.com/alexdobin/STAR/archive/2.5.2a.tar.gz | tar -xz \\\n && cd STAR-2.5.2a \\\n && make STAR", + "_type": "code" + }, + { + "_type": "block", + "style": "normal", + "_key": "5387c5d1aae0", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "We can add custom Perl modules and specify environmental variables such as ", + "_key": "21f01a7dee08" + }, + { + "text": "PERL5LIB", + "_key": "3c35ccd9597e", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_key": "7edd690d58bf", + "_type": "span", + "marks": [], + "text": " as below:" + } + ] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "88da8fa38161" + } + ], + "_type": "block", + "style": "normal", + "_key": "95b43e15b080" + }, + { + "code": "# Install FEELnc\nRUN wget -q https://github.com/tderrien/FEELnc/archive/a6146996e06f8a206a0ae6fd59f8ca635c7d9467.zip \\\n && unzip a6146996e06f8a206a0ae6fd59f8ca635c7d9467.zip \\\n && mv FEELnc-a6146996e06f8a206a0ae6fd59f8ca635c7d9467 /FEELnc \\\n && rm a6146996e06f8a206a0ae6fd59f8ca635c7d9467.zip\n\nENV FEELNCPATH /FEELnc\nENV PERL5LIB $PERL5LIB:${FEELNCPATH}/lib/", + "_type": "code", + "_key": "02cae409f036" + }, + { + "_type": "block", + "style": "normal", + "_key": "3db7c8965a0b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "R and R libraries can be installed as follows:", + "_key": "fab1d01a8d76" + } + ] + }, + { + "style": "normal", + "_key": "369cb978dbc9", + "children": [ + { + "_type": "span", + "text": "", + "_key": "7e8b16febe0b" + } + ], + "_type": "block" + }, + { + "_key": "b635cd93fe02", + "code": "# Install R\nRUN echo \"deb http://cran.rstudio.com/bin/linux/debian jessie-cran3/\" >> /etc/apt/sources.list &&\\\napt-key adv --keyserver keys.gnupg.net --recv-key 381BA480 &&\\\napt-get update --fix-missing && \\\napt-get -y install r-base\n\n# Install R libraries\nRUN R -e 'install.packages(\"ROCR\", repos=\"http://cloud.r-project.org/\"); install.packages(\"randomForest\",repos=\"http://cloud.r-project.org/\")'", + "_type": "code" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/cbcrg/lncRNA-Annotation-nf/blob/master/Dockerfile", + "_key": "95d80901751f" + } + ], + "children": [ + { + "_key": "31f01f88d7d4", + "_type": "span", + "marks": [], + "text": "For the complete working Dockerfile of this project see " + }, + { + "text": "here", + "_key": "61cd37841c10", + "_type": "span", + "marks": [ + "95d80901751f" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "f897e630ac44" + }, + { + "_type": "block", + "style": "normal", + "_key": "34a2ed31ef9a", + "children": [ + { + "_key": "ec0c46f9c3c6", + "_type": "span", + "text": "" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "###Building the Docker Image", + "_key": "99404c3f6b68", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "1abf7c16ad8c" + }, + { + "children": [ + { + "_key": "1d5e4d812566", + "_type": "span", + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "b636fd70f4f5" + }, + { + "style": "normal", + "_key": "5650048a4760", + "markDefs": [], + "children": [ + { + "_key": "5264f09e8e11", + "_type": "span", + "marks": [], + "text": "Once we start working on the Dockerfile, we can build it anytime using:" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "d641e7f6bf5b", + "children": [ + { + "_key": "70fcc4126623", + "_type": "span", + "text": "" + } + ] + }, + { + "code": "docker build -t skptic/lncRNA_annotation .", + "_type": "code", + "_key": "e90f06c1b843" + }, + { + "markDefs": [], + "children": [ + { + "text": "This builds the image from the Dockerfile and assigns a tag (i.e. a name) for the image. If there are no errors, the Docker image is now in you local Docker repository ready for use.", + "_key": "fe3388bbb799", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "8ccc8a028371" + }, + { + "children": [ + { + "_key": "23129a90f294", + "_type": "span", + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "7738ce1608b0" + }, + { + "markDefs": [], + "children": [ + { + "text": "###Testing the Docker Image", + "_key": "ac9aefee0790", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "53e684ed0883" + }, + { + "children": [ + { + "text": "", + "_key": "29310a754336", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "f459dd9c7e8f" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "We find it very helpful to test our images as we develop the Docker file. Once built, it is possible to launch the Docker image and test if the desired software was correctly installed. For example, we can test if FEELnc and its dependencies were successfully installed by running the following:", + "_key": "0f7532136e6a" + } + ], + "_type": "block", + "style": "normal", + "_key": "995c7f634de1" + }, + { + "_type": "block", + "style": "normal", + "_key": "0f3a99e8f0f9", + "children": [ + { + "text": "", + "_key": "76902143bcad", + "_type": "span" + } + ] + }, + { + "code": "docker run -ti lncrna_annotation\n\ncd FEELnc/test\n\nFEELnc_filter.pl -i transcript_chr38.gtf -a annotation_chr38.gtf \\\n> -b transcript_biotype=protein_coding > candidate_lncRNA.gtf\n\nexit # remember to exit the Docker image", + "_type": "code", + "_key": "8bc163f9f47c" + }, + { + "style": "normal", + "_key": "8a04e5fe54c3", + "markDefs": [], + "children": [ + { + "text": "###Tagging the Docker Image", + "_key": "3c27e7d47f5a", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "376d9185809e", + "children": [ + { + "_type": "span", + "text": "", + "_key": "b58a8fff4134" + } + ], + "_type": "block" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://hub.docker.com/", + "_key": "e8267b213edb" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Once you are confident your image is built correctly, you can tag it, allowing you to push it to ", + "_key": "0e81f997274e" + }, + { + "text": "Dockerhub.io", + "_key": "9f99511c671e", + "_type": "span", + "marks": [ + "e8267b213edb" + ] + }, + { + "marks": [], + "text": ". Dockerhub is an online repository for docker images which allows anyone to pull public images and run them.", + "_key": "62279d8d8677", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "1a1035fe3e9e" + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "629916622f88" + } + ], + "_type": "block", + "style": "normal", + "_key": "7ad2329cd8e6" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "You can view the images in your local repository with the ", + "_key": "83a7985ea39e" + }, + { + "_key": "9b9c237f8f87", + "_type": "span", + "marks": [ + "code" + ], + "text": "docker images" + }, + { + "marks": [], + "text": " command and tag using ", + "_key": "6aaa7f1f9459", + "_type": "span" + }, + { + "marks": [ + "code" + ], + "text": "docker tag", + "_key": "56edcf9c0231", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " with the image ID and the name.", + "_key": "fccfd00ea0ef" + } + ], + "_type": "block", + "style": "normal", + "_key": "ab2403070ee6", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "4796d2e24cad" + } + ], + "_type": "block", + "style": "normal", + "_key": "2883293716da" + }, + { + "_type": "code", + "_key": "cb58c9b6a966", + "code": "docker images\n\nREPOSITORY TAG IMAGE ID CREATED SIZE\nlncrna_annotation latest d8ec49cbe3ed 2 minutes ago 821.5 MB\n\ndocker tag d8ec49cbe3ed cbcrg/lncrna_annotation:latest" + }, + { + "children": [ + { + "marks": [], + "text": "Now when we check our local images we can see the updated tag.", + "_key": "efecf9499efc", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "977cb77dafd8", + "markDefs": [] + }, + { + "children": [ + { + "_key": "de27f8c8d34d", + "_type": "span", + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "9e069ba58981" + }, + { + "_type": "code", + "_key": "859c42e5cad8", + "code": "docker images\n\nREPOSITORY TAG IMAGE ID CREATED SIZE\ncbcrg/lncrna_annotation latest d8ec49cbe3ed 2 minutes ago 821.5 MB" + }, + { + "style": "normal", + "_key": "36110c0bc0bc", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "###Pushing the Docker Image to Dockerhub", + "_key": "adbb0489873f" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "72eb6aa2d1ff", + "children": [ + { + "text": "", + "_key": "1818ebcbf996", + "_type": "span" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "a7bd5e43df27", + "markDefs": [ + { + "_key": "1cf86a9aeb72", + "_type": "link", + "href": "https://hub.docker.com/" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "If you have not previously, sign up for a Dockerhub account ", + "_key": "d3c68be9bab9" + }, + { + "marks": [ + "1cf86a9aeb72" + ], + "text": "here", + "_key": "73adbe5a767b", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": ". From the command line, login to Dockerhub and push your image.", + "_key": "fdb56fb68fc0" + } + ] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "74b517dfa3a2" + } + ], + "_type": "block", + "style": "normal", + "_key": "76d11410797f" + }, + { + "code": "docker login --username=cbcrg\ndocker push cbcrg/lncrna_annotation", + "_type": "code", + "_key": "72e018a1b3a7" + }, + { + "style": "normal", + "_key": "4e814562758e", + "markDefs": [], + "children": [ + { + "_key": "603c47308e12", + "_type": "span", + "marks": [], + "text": "You can test if you image has been correctly pushed and is publicly available by removing your local version using the IMAGE ID of the image and pulling the remote:" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "12f3da40fcc2", + "children": [ + { + "_type": "span", + "text": "", + "_key": "25b68c28836e" + } + ] + }, + { + "code": "docker rmi -f d8ec49cbe3ed\n\n# Ensure the local version is not listed.\ndocker images\n\ndocker pull cbcrg/lncrna_annotation", + "_type": "code", + "_key": "9c8cc03d66d4" + }, + { + "_key": "67b0d083f1e1", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "We are now almost ready to run our pipeline. The last step is to set up the Nexflow config.", + "_key": "fb18e8ebb6fb", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "851718e1c203", + "children": [ + { + "_key": "7e1f6285672c", + "_type": "span", + "text": "" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "###Nextflow Configuration", + "_key": "ee703ba1a7b8", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "89a8e9b57253" + }, + { + "style": "normal", + "_key": "301b53373abc", + "children": [ + { + "_type": "span", + "text": "", + "_key": "3c12e4f84be6" + } + ], + "_type": "block" + }, + { + "_key": "853618d141bc", + "markDefs": [], + "children": [ + { + "text": "Within the ", + "_key": "e450d4c03687", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "nextflow.config", + "_key": "eb562dcd976e" + }, + { + "_key": "0aada97916c3", + "_type": "span", + "marks": [], + "text": " file in the main project directory we can add the following line which links the Docker image to the Nexflow execution. The images can be:" + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "text": "", + "_key": "56eecb336e47", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "bcefea639daa" + }, + { + "style": "normal", + "_key": "b30a67dabb0c", + "listItem": "bullet", + "children": [ + { + "marks": [], + "text": "General (same docker image for all processes):", + "_key": "dc79564414fe", + "_type": "span" + }, + { + "_key": "88c45b92cac0", + "_type": "span", + "text": "\n\n" + }, + { + "_type": "span", + "text": " process {\n container = 'cbcrg/lncrna_annotation'\n }\n", + "_key": "b8e693fb94da" + }, + { + "_type": "span", + "marks": [], + "text": "Specific to a profile (specified by `-profile crg` for example):", + "_key": "dbfdedf028a7" + }, + { + "_type": "span", + "text": "\n\n", + "_key": "93fd2bf6af97" + }, + { + "text": " profile {\n crg {\n container = 'cbcrg/lncrna_annotation'\n }\n }\n", + "_key": "0e604dd4732b", + "_type": "span" + }, + { + "marks": [], + "text": "Specific to a given process within a pipeline:", + "_key": "711cd0470649", + "_type": "span" + }, + { + "_type": "span", + "text": "\n\n", + "_key": "345b60ce2451" + }, + { + "_type": "span", + "text": " $processName.container = 'cbcrg/lncrna_annotation'", + "_key": "96fd3fcfe331" + } + ], + "_type": "block" + }, + { + "_key": "a560b7a67c40", + "children": [ + { + "_type": "span", + "text": "", + "_key": "14aebd1b7468" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "4033d3bebdf9", + "markDefs": [ + { + "_type": "link", + "href": "https://www.nextflow.io/blog/2016/best-practice-for-reproducibility.html", + "_key": "f61aacdb2ef0" + } + ], + "children": [ + { + "marks": [], + "text": "In most cases it is easiest to use the same Docker image for all processes. One further thing to consider is the inclusion of the sha256 hash of the image in the container reference. I have ", + "_key": "1a64527f3033", + "_type": "span" + }, + { + "text": "previously written about this", + "_key": "38cf9657683c", + "_type": "span", + "marks": [ + "f61aacdb2ef0" + ] + }, + { + "_type": "span", + "marks": [], + "text": ", but briefly, including a hash ensures that not a single byte of the operating system or software is different.", + "_key": "bc4e97553513" + } + ] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "441548f75de3" + } + ], + "_type": "block", + "style": "normal", + "_key": "0eaf14f96c05" + }, + { + "code": " process {\n container = 'cbcrg/lncrna_annotation@sha256:9dfe233b...'\n }", + "_type": "code", + "_key": "e986f84b6af5" + }, + { + "markDefs": [], + "children": [ + { + "text": "All that is left now to run the pipeline.", + "_key": "132c729c8d25", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "39e6843958d4" + }, + { + "style": "normal", + "_key": "a4a85a9e7228", + "children": [ + { + "_type": "span", + "text": "", + "_key": "3eba503d4fca" + } + ], + "_type": "block" + }, + { + "_type": "code", + "_key": "a90f3eeed817", + "code": "nextflow run lncRNA-Annotation-nf -profile test" + }, + { + "children": [ + { + "marks": [], + "text": "Whilst I have explained this step-by-step process in a linear, consequential manner, in reality the development process is often more circular with changes in the Docker images reflecting changes in the pipeline.", + "_key": "6bc1b9275274", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "e51c1eda68c5", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "text": "", + "_key": "7ece5b1d69ec" + } + ], + "_type": "block", + "style": "normal", + "_key": "f4ab602e7e18" + }, + { + "children": [ + { + "marks": [], + "text": "###CircleCI and Nextflow", + "_key": "127548bd2ca6", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "3e9640736a34", + "markDefs": [] + }, + { + "_key": "8bd12b9a35d5", + "children": [ + { + "_key": "48776ea3a77d", + "_type": "span", + "text": "" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "http://www.circleci.com", + "_key": "52d7d21fec88" + } + ], + "children": [ + { + "_key": "e1a0115c8a63", + "_type": "span", + "marks": [], + "text": "Now that you have a pipeline that successfully runs on a test dataset with Docker, a very useful step is to add a continuous development component to the pipeline. With this, whenever you push a modification of the pipeline to the GitHub repo, the test data set is run on the " + }, + { + "_key": "bf9e5650e51a", + "_type": "span", + "marks": [ + "52d7d21fec88" + ], + "text": "CircleCI" + }, + { + "_type": "span", + "marks": [], + "text": " servers (using Docker).", + "_key": "a7690c9f35e1" + } + ], + "_type": "block", + "style": "normal", + "_key": "006188af7329" + }, + { + "style": "normal", + "_key": "bbb43942df4f", + "children": [ + { + "text": "", + "_key": "565cf1047e08", + "_type": "span" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "f8cea4ca2097", + "_type": "span", + "marks": [], + "text": "To include CircleCI in the Nexflow pipeline, create a file named " + }, + { + "marks": [ + "code" + ], + "text": "circle.yml", + "_key": "fa98c01db045", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " in the project directory. We add the following instructions to the file:", + "_key": "2f21b332f3b0" + } + ], + "_type": "block", + "style": "normal", + "_key": "41364a5f63d3", + "markDefs": [] + }, + { + "style": "normal", + "_key": "e2b0b14d0fd2", + "children": [ + { + "_type": "span", + "text": "", + "_key": "25942a6c677a" + } + ], + "_type": "block" + }, + { + "_type": "code", + "_key": "7433acb412d2", + "code": "machine:\n java:\n version: oraclejdk8\n services:\n - docker\n\ndependencies:\n override:\n\ntest:\n override:\n - docker pull cbcrg/lncrna_annotation\n - curl -fsSL get.nextflow.io | bash\n - ./nextflow run . -profile test" + }, + { + "style": "normal", + "_key": "70d6d1859e1d", + "markDefs": [], + "children": [ + { + "_key": "433129d9fd5e", + "_type": "span", + "marks": [], + "text": "Next you can sign up to CircleCI, linking your GitHub account." + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "2f2296b7bb34", + "children": [ + { + "text": "", + "_key": "3a0243c5639e", + "_type": "span" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "261b716a06a7", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Within the GitHub README.md you can add a badge with the following:", + "_key": "0be2d4a60379", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "c227bf5b9089", + "children": [ + { + "text": "", + "_key": "64db719ff8e1", + "_type": "span" + } + ] + }, + { + "_key": "a375b3bed0e9", + "code": "![CircleCI status](https://circleci.com/gh/cbcrg/lncRNA-Annotation-nf.png?style=shield)", + "_type": "code" + }, + { + "style": "normal", + "_key": "1642b961bc5a", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "###Tips and Tricks", + "_key": "46f101c27e69" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "993ba2832874", + "children": [ + { + "_key": "dd9168b63937", + "_type": "span", + "text": "" + } + ], + "_type": "block" + }, + { + "_key": "d33d746e9473", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "File permissions", + "_key": "a2f6b726c62d" + }, + { + "marks": [], + "text": ": When a process is executed by a Docker container, the UNIX user running the process is not you. Therefore any files that are used as an input should have the appropriate file permissions. For example, I had to change the permissions of all the input data in the test data set with:", + "_key": "287310bd8df1", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "0fd72cb1c652", + "children": [ + { + "_key": "f294eccb09f6", + "_type": "span", + "text": "" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "d03452f5b41c", + "markDefs": [], + "children": [ + { + "text": "find ", + "_key": "d7e384eae7c7", + "_type": "span", + "marks": [] + }, + { + "text": "", + "_key": "28f9a9ea28bf", + "_type": "span" + }, + { + "text": " -type f -exec chmod 644 {} ", + "_key": "d91084971fde", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "text": "\\;", + "_key": "542ab5615352" + }, + { + "_type": "span", + "marks": [], + "text": " find ", + "_key": "64fa506b82ef" + }, + { + "_type": "span", + "text": "", + "_key": "151f315f9c14" + }, + { + "_type": "span", + "marks": [], + "text": " -type d -exec chmod 755 {} ", + "_key": "d588ee4d8fb4" + }, + { + "_type": "span", + "text": "\\;", + "_key": "83813e5d73d2" + } + ] + }, + { + "_key": "5c097f5ad5b2", + "children": [ + { + "text": "", + "_key": "8dc4ce35290d", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "16153769e1e6", + "markDefs": [ + { + "_key": "a645ea709cb2", + "_type": "link", + "href": "mailto:/evanfloden@gmail.com" + } + ], + "children": [ + { + "marks": [], + "text": "###Summary This was my first time building a Docker image and after a bit of trial-and-error the process was surprising straight forward. There is a wealth of information available for Docker and the almost seamless integration with Nextflow is fantastic. Our collaboration team is now looking forward to applying the pipeline to different datasets and publishing the work, knowing our results will be completely reproducible across any platform. ", + "_key": "f21d4187558c", + "_type": "span" + }, + { + "text": "", + "_key": "4270a7ace6f5", + "_type": "span" + }, + { + "_type": "span", + "text": "", + "_key": "91edb73c5f75" + }, + { + "_type": "span", + "marks": [ + "a645ea709cb2" + ], + "text": "/evanfloden@gmail.com", + "_key": "38e9df5b660d" + } + ], + "_type": "block" + } + ], + "_rev": "Ot9x7kyGeH5005E3MIo38v", + "title": "Docker for dunces & Nextflow for nunces", + "_updatedAt": "2024-09-26T09:01:26Z", + "_type": "blogPost", + "_createdAt": "2024-09-25T14:15:05Z", + "meta": { + "slug": { + "current": "docker-for-dunces-nextflow-for-nunces" + } + }, + "author": { + "_ref": "evan-floden", + "_type": "reference" + }, + "tags": [ + { + "_ref": "ace8dd2c-eed3-4785-8911-d146a4e84bbb", + "_type": "reference", + "_key": "5edc3ed408ba" + }, + { + "_type": "reference", + "_key": "c2a74b2b2cad", + "_ref": "b6511053-299b-4aa5-8957-94fb9ebc9493" + } + ], + "publishedAt": "2016-06-10T06:00:00.000Z" + }, + { + "_type": "blogPost", + "title": "Introducing the new Pipeline Launch forms: A leap forward in usability and functionality", + "meta": { + "noIndex": false, + "slug": { + "_type": "slug", + "current": "new-pipeline-launch-forms" + }, + "_type": "meta", + "description": "Today, we are excited to introduce the newly redesigned Pipeline launch forms, marking the first phase in a broader initiative to revamp the entire form submission experience across our platform. " + }, + "_rev": "y83n3eQxj1PRqzuDdkeW1u", + "publishedAt": "2024-08-15T09:11:00.000Z", + "body": [ + { + "_key": "c3aaafb21d9e", + "markDefs": [ + { + "href": "https://feedback.seqera.io/feature-requests/p/update-forms-user-interface-including-pipeline-launch-relaunch-form-redesign", + "_key": "88ea45bac22e", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "At Seqera, we’re committed to listening to our users feedback and continuously improving the Platform to meet your evolving needs. One of the most ", + "_key": "02fd5be96db90" + }, + { + "_type": "span", + "marks": [ + "88ea45bac22e" + ], + "text": "common feature requests", + "_key": "02fd5be96db91" + }, + { + "text": " has been to enhance the form submission process, specifically the Pipeline Launch and Relaunch forms. Today, we are excited to introduce the newly redesigned Pipeline Launch forms, marking the first phase in a broader initiative to ", + "_key": "02fd5be96db92", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "strong" + ], + "text": "revamp the entire form submission experience", + "_key": "02fd5be96db93", + "_type": "span" + }, + { + "marks": [], + "text": " across our platform. This update drastically simplifies interactions in the Seqera Platform, enhancing the day-to-day user experience by addressing known usability issues in our most frequently used forms.", + "_key": "02fd5be96db94", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "youtube", + "id": "_fru3RxBDPY", + "_key": "8957e1a6644c" + }, + { + "_type": "block", + "style": "normal", + "_key": "d013fcd8ab46", + "markDefs": [ + { + "href": "https://nf-co.re/rnaseq", + "_key": "115a2d75cd73", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "em" + ], + "text": "Screen recording of the submission of the popular", + "_key": "ff0559080c5f" + }, + { + "_type": "span", + "marks": [], + "text": " ", + "_key": "4a7b74425853" + }, + { + "text": "nf-core rnaseq", + "_key": "825c7d2a20a2", + "_type": "span", + "marks": [ + "em", + "115a2d75cd73" + ] + }, + { + "_key": "1371156ece2d", + "_type": "span", + "marks": [], + "text": " " + }, + { + "marks": [ + "em" + ], + "text": "pipeline, highlighting several features of the new form.", + "_key": "61f7d7a58d6d", + "_type": "span" + } + ] + }, + { + "style": "blockquote", + "_key": "0c7e71524011", + "markDefs": [ + { + "_key": "d729831b3c3d", + "_type": "link", + "href": "/contact-us/" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The new Pipeline Launch and Relaunch form will be available to all Cloud users in an upcoming release, but if you are interested in being one of the early adopters, please contact your Seqera Account Executive or ", + "_key": "62c6e4fa4b92" + }, + { + "_type": "span", + "marks": [ + "d729831b3c3d" + ], + "text": "send us an email directly", + "_key": "a327f42f4d58" + }, + { + "_key": "0f90a0416b6a", + "_type": "span", + "marks": [], + "text": "." + } + ], + "_type": "block" + }, + { + "style": "h2", + "_key": "8d2fef8b52c1", + "markDefs": [], + "children": [ + { + "text": "Why the change?", + "_key": "b99fcb056cb80", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "2ab0657a6c5d", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "User experience is at the heart of everything we do. Over time, we've received valuable feedback from our users about the forms on our platform. In particular, we gathered feedback on some of the most frequently used forms: Pipeline Launch, Relaunch and Resume forms. In response, we have made significant enhancements to create a more intuitive, efficient, and user-friendly experience.", + "_key": "e1d6b1a2dfc3" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "h2", + "_key": "839b514875ce", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Key Objectives", + "_key": "eff267db0ffe0", + "_type": "span" + } + ] + }, + { + "_key": "663f9546066e", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The redesign of the Pipeline Launch and Relaunch forms was guided by four objectives:", + "_key": "7ff0898d0be60" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "615515ed9874", + "listItem": "number", + "markDefs": [], + "children": [ + { + "text": "Simpler navigation: ", + "_key": "1d26e055f5610", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "5e9912500f67", + "_type": "span", + "marks": [], + "text": "The new multi-step approach ensures that users can easily navigate through pipeline launch form submissions without unnecessary steps. Key information is stored and grouped logically, allowing users to focus on the essential steps.\n" + } + ], + "level": 1, + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "text": "Enhanced validation:", + "_key": "29322453600b0", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "38f8ea4cc069", + "_type": "span", + "marks": [], + "text": " We've added robust validation features to ensure the accuracy and completeness of submitted information, reducing errors and helping users avoid common pitfalls during pipeline configuration.\n" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "0a3c9275d682", + "listItem": "number" + }, + { + "listItem": "number", + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Improved clarity:", + "_key": "9a36879cad1a0", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " Form content has been updated to be more concise and clear, ensuring users can quickly understand the requirements and options available to them, thus reducing confusion and improving overall efficiency.\n", + "_key": "b59310c7a08f" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "41c43b0e4337" + }, + { + "markDefs": [], + "children": [ + { + "text": "Enhanced key components:", + "_key": "cc7b374d1f3c0", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "0b6abf765bcd", + "_type": "span", + "marks": [], + "text": " Key form components have been redesigned to offer a more intuitive user experience. This includes more dynamic control of the configured parameters, the ability to switch between a UI schema view, and interactive JSON and YAML rendering for full control every time a user launches, relaunches or resumes a pipeline." + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "62a1887cef00", + "listItem": "number" + }, + { + "_type": "block", + "style": "h2", + "_key": "fa6adc370047", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Enhancements", + "_key": "312d4a3b6aaf0" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "text": "The redesigned Pipeline Launch and Relaunch forms come with a host of new features designed to improve usability and functionality:", + "_key": "61e7239ff41b0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "73b1e280cd30" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "38620088a7ad", + "listItem": "number", + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Multi-step approach:", + "_key": "cdb7790f12f30", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " Users can now navigate through forms with a streamlined, multi-step approach. If everything is set up correctly, there's no need to go through all steps –simply run what you know works.", + "_key": "1890588ea99a" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "b97a50ad5378", + "listItem": "number", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Enhanced assistance:", + "_key": "527b2be40ea70" + }, + { + "_key": "e27e6ee2d88f", + "_type": "span", + "marks": [], + "text": " We've improved feedback mechanisms to provide detailed information about errors or missing parameters helping users to quickly identify and rectify issues before launching pipelines." + } + ], + "level": 1 + }, + { + "_type": "block", + "style": "normal", + "_key": "5e86cf51f887", + "listItem": "number", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Developer-friendly:", + "_key": "b73d4356a53b0" + }, + { + "text": " Developers can switch between UI schema views and a more comprehensive parameter view using JSON and YAML interactive rendering. This flexibility allows for dynamic control of form validity and ensures that developers have the tools they need to configure their pipelines effectively.", + "_key": "b02329811097", + "_type": "span", + "marks": [] + } + ], + "level": 1 + }, + { + "_key": "407755155091", + "listItem": "number", + "markDefs": [], + "children": [ + { + "text": "Enhanced rendering:", + "_key": "ea635d07dac20", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "text": " The form now dynamically generates the UI interface for parameter input whenever a compatible schema is defined. This improvement addresses previous limitations where the UI interface was only rendered when launching a saved pipeline, as opposed to relaunching or using Quick Launch. With this update, the UI is rendered consistently across all launching scenarios, providing a more convenient and streamlined experience.", + "_key": "34bd6d508e99", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Improved flow and status information:", + "_key": "afcef5fe2e9a0" + }, + { + "_type": "span", + "marks": [], + "text": " The new design offers a smoother flow and more informative status updates, providing a clear view of the submission process at every stage.", + "_key": "45d7a470b0fb" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "fdc5babed53d", + "listItem": "number", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "909bfadb3e3e", + "listItem": "number", + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Summary step:", + "_key": "3e527527f45a", + "_type": "span" + }, + { + "text": " A new summary view allows users to review all information at a glance before launching their pipeline.\n", + "_key": "88d8bdfc44f4", + "_type": "span", + "marks": [] + } + ], + "level": 1 + }, + { + "_key": "73fc1879d9a4", + "markDefs": [], + "children": [ + { + "_key": "1a460c4d3b490", + "_type": "span", + "marks": [], + "text": "Summary" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "2f8698228d89", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "By focusing on these key objectives and enhancements, we aimed to improve one of the most commonly performed actions in the Seqera Platform. The redesigned form makes it easier for new and experienced users to run pipelines in the Seqera Platform. This effort is just the beginning of our goal of enhancing the form submission experience across the platform. Moreover, this initial refactor enables us to continue improving and expanding the user experience in the future. You can expect more enhancements as we roll out additional features and improvements based on our community feedback.", + "_key": "f7514251e56d0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "0fcc413706ad", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "77091802bb0d" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "dc820fe7dbe3", + "markDefs": [ + { + "_key": "0e8e643720c4", + "_type": "link", + "href": "https://docs.seqera.io/platform/24.1/launch/launchpad#launch-form" + } + ], + "children": [ + { + "marks": [], + "text": "Read the ", + "_key": "9654eadd2b3b0", + "_type": "span" + }, + { + "_key": "bdb86ee0c748", + "_type": "span", + "marks": [ + "0e8e643720c4" + ], + "text": "official documentation" + }, + { + "text": " to find out more.", + "_key": "881341a728c4", + "_type": "span", + "marks": [] + } + ] + } + ], + "author": { + "_ref": "mattia-bosio", + "_type": "reference" + }, + "_updatedAt": "2024-08-16T15:23:20Z", + "_createdAt": "2024-08-15T08:44:07Z", + "_id": "5cf61b02-f036-49f9-850c-72e0bf3d4f35", + "tags": [ + { + "_ref": "82fd60f1-c6d0-4b8a-9c5d-f971c622f341", + "_type": "reference", + "_key": "c2000532faf0" + } + ] + }, + { + "_updatedAt": "2024-09-03T07:58:25Z", + "author": { + "_type": "reference", + "_ref": "evan-floden" + }, + "_id": "5df71356-10dc-422f-bae8-e26491a560dc", + "_createdAt": "2024-08-06T13:37:21Z", + "body": [ + { + "_type": "image", + "_key": "39e47c87a1bd", + "asset": { + "_type": "reference", + "_ref": "image-3d25c202215864675258a5c2c5084d2f656aae73-1200x629-png" + } + }, + { + "_key": "f5d38bed2100", + "markDefs": [ + { + "href": "https://www.tinybio.cloud/", + "_key": "0eadc8ec1fed", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "We are thrilled to announce that Seqera is joining forces with ", + "_key": "e2be5dfe2b940" + }, + { + "text": "tinybio", + "_key": "e2be5dfe2b941", + "_type": "span", + "marks": [ + "0eadc8ec1fed" + ] + }, + { + "_key": "e2be5dfe2b942", + "_type": "span", + "marks": [], + "text": ", a NYC-based tech-bio start-up known for its AI-integrated scientific tools focused on executing pipelines and analyses via natural language. We are happy to welcome the tinybio team and community into the Seqera family." + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "h2", + "_key": "28fd4bc59876", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Empowering All Scientists with Advanced Data Tools", + "_key": "e583cc66c5fc0" + } + ] + }, + { + "_key": "9d1bdb6acb67", + "markDefs": [ + { + "_type": "link", + "href": "https://www.nature.com/articles/s41467-024-49777-x", + "_key": "b2a101e1faea" + }, + { + "_type": "link", + "href": "https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-021-01304-y", + "_key": "8e1ad6d30663" + }, + { + "_type": "link", + "href": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8719813/", + "_key": "307be9175f5a" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Scientists spend a ", + "_key": "6a6167e26f940" + }, + { + "marks": [ + "b2a101e1faea" + ], + "text": "significant proportion of their time", + "_key": "6a6167e26f941", + "_type": "span" + }, + { + "_key": "6a6167e26f942", + "_type": "span", + "marks": [], + "text": " transforming and structuring data for analysis. In fact, a " + }, + { + "_key": "6a6167e26f943", + "_type": "span", + "marks": [ + "8e1ad6d30663" + ], + "text": "lessons learned piece on the COVID-19 pandemic " + }, + { + "_type": "span", + "marks": [], + "text": "underscored how ", + "_key": "6a6167e26f944" + }, + { + "marks": [ + "strong" + ], + "text": "issues in data analysis ", + "_key": "6a6167e26f945", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": "and study design can ", + "_key": "6a6167e26f946" + }, + { + "_key": "6a6167e26f947", + "_type": "span", + "marks": [ + "strong" + ], + "text": "significantly impact scientific breakthroughs" + }, + { + "_key": "6a6167e26f948", + "_type": "span", + "marks": [], + "text": ". " + }, + { + "_key": "6a6167e26f949", + "_type": "span", + "marks": [ + "307be9175f5a" + ], + "text": "As biological data continues to grow exponentially" + }, + { + "text": ", there is an urgent need to manage large-scale data more rapidly for accelerated scientific breakthroughs. To achieve this, we are partnering with tinybio to ", + "_key": "6a6167e26f9410", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "strong" + ], + "text": "harness the power of GenAI,", + "_key": "6a6167e26f9411", + "_type": "span" + }, + { + "_key": "6a6167e26f9412", + "_type": "span", + "marks": [], + "text": " lowering the barrier for scientists to fully leverage advanced computational tools to achieve their research goals." + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "27ddeccac626", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "45b78a00c7e8", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "tinybio: Specialized ChatGPT for Researchers", + "_key": "a3b9dc23dc9a0" + } + ], + "_type": "block", + "style": "h2", + "_key": "3ab4e5cf5dfe" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://www.tinybio.cloud/", + "_key": "908a8535997e" + }, + { + "_key": "4cc3db069b96", + "_type": "link", + "href": "https://chatgpt.com/" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Motivated by challenges faced as researchers experimenting with different bioinformatics packages, Sasha and Vishal founded ", + "_key": "405ee96282980" + }, + { + "_type": "span", + "marks": [ + "908a8535997e" + ], + "text": "tinybio", + "_key": "405ee96282981" + }, + { + "marks": [], + "text": " in 2022, convinced there had to be a better, easier way to get started with bioinformatics. The initial goal of tinybio was to remove the barrier to entry for running bioinformatics packages, a mission that gained significant momentum with the announcement of ", + "_key": "405ee96282982", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "4cc3db069b96" + ], + "text": "ChatGPT", + "_key": "405ee96282983" + }, + { + "text": " in November 2022. The tinybio co-founders recognized the potential of ", + "_key": "405ee96282984", + "_type": "span", + "marks": [] + }, + { + "_key": "0ba59e2f9664", + "_type": "span", + "marks": [ + "strong" + ], + "text": "leveraging GenAI " + }, + { + "_key": "984fcc760079", + "_type": "span", + "marks": [], + "text": "for empowering all scientists to effectively utilize bioinformatics tools, regardless of their experience or research background. Ever since, tinybio have focused on applying GenAI to drive bioinformatics innovation." + } + ], + "_type": "block", + "style": "normal", + "_key": "58b28d00c516" + }, + { + "style": "normal", + "_key": "e289bac08afd", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "9722baf8eb0a", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_key": "c3ff7a2beda0", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "\"After seeing the amazing traction around our chat-based pipeline execution and analysis tool, Vishal and I knew that we needed to partner with the leader in bioinformatics pipelines to enable our vision for ", + "_key": "bc450efddd08", + "_type": "span" + }, + { + "marks": [ + "strong" + ], + "text": "more open science", + "_key": "d051bfadfe53", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " and to ", + "_key": "f42d1845bb72" + }, + { + "_key": "6b8aae3beb95", + "_type": "span", + "marks": [ + "strong" + ], + "text": "onboard millions more to computational biology." + }, + { + "marks": [], + "text": " We are truly excited to be joining the Seqera team and contributing to advancing science for everyone through their Nextflow, Wave, MultiQC, and Fusion products.\" - ", + "_key": "2f742e542afe", + "_type": "span" + }, + { + "text": "Sasha Dagayev, Co-founder at tinybio", + "_key": "bba720e98780", + "_type": "span", + "marks": [ + "strong" + ] + } + ], + "_type": "block", + "style": "blockquote" + }, + { + "children": [ + { + "_key": "8a74a60c4d9c", + "_type": "span", + "marks": [], + "text": "\ntinybio’s authentic and pragmatic approach to " + }, + { + "_key": "f21eee87a50a", + "_type": "span", + "marks": [ + "strong" + ], + "text": "leveraging LLMs for bioinformatics" + }, + { + "text": " is essential in bridging the gap between scientists and advanced computational capabilities to accelerate scientific discovery. By incorporating this technology, we aim to significantly enhance our existing ", + "_key": "1b90bec51a6f", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "95fd0bd0acf6" + ], + "text": "pipelines", + "_key": "d079d983b04a" + }, + { + "text": ",", + "_key": "b1b5f1bfc9ae", + "_type": "span", + "marks": [] + }, + { + "_key": "c42631b1fa90", + "_type": "span", + "marks": [ + "d259679e3355" + ], + "text": " containers" + }, + { + "_type": "span", + "marks": [], + "text": " and web resources, making high-quality, reproducible bioinformatics tools more accessible to researchers worldwide. Our goal is to ", + "_key": "a1530ae5fa4d" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "empower the global scientific community", + "_key": "f5a66b9f8ce7" + }, + { + "_key": "4f48f33b58fc", + "_type": "span", + "marks": [], + "text": " with the resources they need to drive innovation and advance our understanding of complex biological systems." + } + ], + "_type": "block", + "style": "normal", + "_key": "20b81fc022bb", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/pipelines/", + "_key": "95fd0bd0acf6" + }, + { + "_type": "link", + "href": "https://seqera.io/containers/", + "_key": "d259679e3355" + } + ] + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "781c75a3bb98", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "de50ded18161", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "text": "A New Era for AI-enabled Bioinformatics", + "_key": "e0633d49502f0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2", + "_key": "a35537f824ee" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7998306/", + "_key": "81a2231f5c08" + } + ], + "children": [ + { + "marks": [], + "text": "The biotech and bioinformatics landscape is rapidly evolving, driven in-part by technological advances in AI. The ability to analyze massive datasets, identify patterns, and generate predictive models is revolutionizing scientific research. We also believe that AI is a powerful tool to democratize and amplify access to the most sophisticated bioinformatics tools out there. By leveraging ", + "_key": "03d0891e1aa70", + "_type": "span" + }, + { + "marks": [ + "81a2231f5c08" + ], + "text": "human-centric AI", + "_key": "03d0891e1aa71", + "_type": "span" + }, + { + "_key": "03d0891e1aa72", + "_type": "span", + "marks": [], + "text": ", we can " + }, + { + "text": "enable the 10x scientist", + "_key": "0a03364b0e08", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": " to translate complex biological data into actionable insights, thereby expediting scientific discovery and innovation.", + "_key": "49ef31effc8b" + } + ], + "_type": "block", + "style": "normal", + "_key": "cf91c1dabfd0" + }, + { + "markDefs": [], + "children": [ + { + "_key": "8560ad3cef8e0", + "_type": "span", + "marks": [], + "text": "\n" + }, + { + "marks": [], + "text": "Our partnership with tinybio represents a significant milestone in our journey to ", + "_key": "903ece1a151f", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "advance science for everyone through software", + "_key": "3cc381edad69" + }, + { + "text": ". This collaboration will lower the barrier of entry for a broader range of researchers to utilize bioinformatics tools effectively, facilitating groundbreaking innovations and transforming the future of genomics.", + "_key": "9d2e53834331", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "dd017c0c5d56" + }, + { + "children": [ + { + "text": "", + "_key": "32ab8ed1feb7", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "3a554d01a336", + "markDefs": [] + }, + { + "_key": "fd72009a708b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "\"New interaction models with powerful computational platforms are transforming not just how scientists work but also what they discover. By empowering scientists with modern software engineering practices, we are ", + "_key": "ce5a8009c2f90" + }, + { + "_key": "797c10fef3eb", + "_type": "span", + "marks": [ + "strong" + ], + "text": "enabling the next generation of innovations" + }, + { + "text": " in personalized therapeutics, sustainable materials, better drug delivery methodologies, and green chemical and agricultural production. This acquisition marks a significant step towards accelerating scientific discoveries and enabling researchers with better software.\" -", + "_key": "f3cfe9eb53f7", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": " Evan Floden, CEO at Seqera", + "_key": "7c3b3468dfe3" + } + ], + "_type": "block", + "style": "blockquote" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "1bcc2f6d724a" + } + ], + "_type": "block", + "style": "normal", + "_key": "849a938fb79b" + }, + { + "_key": "57dd7a75086e", + "markDefs": [], + "children": [ + { + "_key": "2b06457c0ed50", + "_type": "span", + "marks": [], + "text": "Enhancing our Open Science Core" + } + ], + "_type": "block", + "style": "h2" + }, + { + "children": [ + { + "marks": [], + "text": "Our mission at Seqera is to ", + "_key": "28b149c00eee0", + "_type": "span" + }, + { + "_key": "57dc51e60f83", + "_type": "span", + "marks": [ + "strong" + ], + "text": "make science accessible to everyone through software" + }, + { + "text": ". As research becomes increasingly digitized, there is a critical need to access all available scientific research to make informed R&D decisions and ultimately accelerate the impact on patients. Central to achieving this is Open Science, which ensures reproducibility, validation and transparency across the scientific community. With AI, we want to further enhance our Open Science core, by lowering the barrier of adoption of bioinformatics tools for ", + "_key": "f068fe8ef9f4", + "_type": "span", + "marks": [] + }, + { + "text": "millions more researchers worldwide,", + "_key": "28b149c00eee1", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": " driving more rapid advancements in science and medicine.", + "_key": "28b149c00eee2" + } + ], + "_type": "block", + "style": "normal", + "_key": "6ace1e068755", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_key": "a54ccaf62d6c0", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "83a6b2dc9d92" + }, + { + "style": "h2", + "_key": "5b4fbb778158", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "What’s Next for Seqera and tinybio?", + "_key": "6db8f88cd0a20" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "be45046290e5", + "markDefs": [], + "children": [ + { + "_key": "d1f478e8ca4e0", + "_type": "span", + "marks": [], + "text": "Seqera is excited to collaborate closely with tinybio’s founders Sasha Dagayev and Vishal Patel to further its mission of advancing science for everyone through software. Their expertise will be instrumental in driving the development of community-centric tools on Seqera.io," + }, + { + "text": " empowering scientists worldwide", + "_key": "7df2ef0b8ff2", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "30172707b906", + "_type": "span", + "marks": [], + "text": " to leverage modern software capabilities on demand." + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "249e21a060a8", + "markDefs": [], + "children": [ + { + "_key": "a38bd6a01e2e0", + "_type": "span", + "marks": [], + "text": "We will first focus on leveraging AI to solve the cold start problem for the next generation of scientists and " + }, + { + "text": "removing barriers to entry to bioinformatics", + "_key": "6970533750e5", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": ". Existing powerful frameworks and resources, such as Nextflow, nf-core, Seqera Pipelines and Containers, have been significantly enhancing the research productivity of bioinformaticians, but come with a steep learning curve that prevents newcomers from getting started fast.", + "_key": "7f4fec95cdd5" + } + ] + }, + { + "style": "normal", + "_key": "d42b3b57dfac", + "markDefs": [], + "children": [ + { + "_key": "8ee4c6456e1a0", + "_type": "span", + "marks": [], + "text": "We want to free the next generation of scientists from wasting time in the nitty gritty of setting up various bioinformatics packages and infrastructure. We believe future scientists should be able to focus on understanding the “what” and “why” of their analysis, while the “how” is generated for them in an understandable and verifiable way. Our tools and resources provide already powerful building blocks to enable this, and we cannot wait to bring these new updates to users in the coming months. Stay tuned!" + } + ], + "_type": "block" + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "7f7a558dfa34", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "6d3fbe9d99a0", + "markDefs": [] + }, + { + "markDefs": [ + { + "href": "https://hubs.la/Q02NlSWM0", + "_key": "eb86d506d8aa", + "_type": "link" + }, + { + "_key": "5f0b731c8d4f", + "_type": "link", + "href": "https://hubs.la/Q02NlVXy0" + } + ], + "children": [ + { + "_key": "fd5a307d6e25", + "_type": "span", + "marks": [], + "text": "Interested in finding out more? Watch the Nextflow Channels podcast on " + }, + { + "marks": [ + "eb86d506d8aa" + ], + "text": "GenAI for bioinformatics", + "_key": "f1a61d9d9482", + "_type": "span" + }, + { + "marks": [], + "text": " or ", + "_key": "8a4c1b94d2cb", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "5f0b731c8d4f" + ], + "text": "subscribe to our newsletter", + "_key": "d505924c7484" + }, + { + "_type": "span", + "marks": [], + "text": " to stay tuned!", + "_key": "09ba811e5ef1" + } + ], + "_type": "block", + "style": "blockquote", + "_key": "902d0053f19e" + }, + { + "_type": "block", + "style": "normal", + "_key": "83248382cacc", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "d1a81dc47aac" + } + ] + }, + { + "style": "h2", + "_key": "cfcca3842875", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "About tinybio", + "_key": "113745debebc0" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "tinybio is a New York City based startup focused on the application of generally available generative AI technologies to help bioinformaticians and researchers. It was started by Sasha Dagayev and Vishal Patel in 2022. To date, the company has helped thousands of researchers to resolve hundreds of thousands of bioinformatics issues.", + "_key": "eedbe6e83ab60" + } + ], + "_type": "block", + "style": "normal", + "_key": "0119756f1ecc", + "markDefs": [] + } + ], + "tags": [ + { + "_ref": "d356a4d5-06c1-40c2-b655-4cb21cf74df1", + "_type": "reference", + "_key": "3395edbcdd9d" + }, + { + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058", + "_type": "reference", + "_key": "c9112db92839" + } + ], + "meta": { + "slug": { + "_type": "slug", + "current": "tinybio-joins-seqera-to-advance-science-for-everyone-now-through-genai" + }, + "_type": "meta", + "shareImage": { + "asset": { + "_type": "reference", + "_ref": "image-3d25c202215864675258a5c2c5084d2f656aae73-1200x629-png" + }, + "_type": "image" + }, + "description": "We are thrilled to announce that Seqera is joining forces with tinybio, a NYC-based tech-bio start-up known for its AI-integrated scientific tools focused on executing pipelines and analyses via natural language. \n", + "noIndex": false + }, + "_rev": "Z979U64FXLC2cFZCkgkV9v", + "title": "Seqera acquires tinybio to Advance Science for Everyone - Now Through GenAI!", + "_type": "blogPost", + "publishedAt": "2024-08-06T13:43:00.000Z" + }, + { + "_type": "blogPost", + "_id": "8e1a9fb2-814c-455b-890c-5f3f07e83da4", + "tags": [ + { + "_key": "e3b6edcaea48", + "_ref": "b6511053-299b-4aa5-8957-94fb9ebc9493", + "_type": "reference" + } + ], + "_updatedAt": "2024-07-24T10:52:39Z", + "publishedAt": "2024-07-17T12:39:00.000Z", + "_createdAt": "2024-07-15T13:31:19Z", + "body": [ + { + "_type": "block", + "style": "h2", + "_key": "e4775c3fdbeb", + "markDefs": [], + "children": [ + { + "text": "Understanding the Nextflow User Community", + "_key": "8e229381fa710", + "_type": "span", + "marks": [] + } + ] + }, + { + "_key": "1a4d6ab2b758", + "markDefs": [ + { + "_key": "3025a534b404", + "_type": "link", + "href": "https://nextflow.io/" + }, + { + "href": "https://hubs.la/Q02HMCZ70", + "_key": "c521b2714e5e", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "In April, we conducted our annual ", + "_key": "19c26daec2820" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "State of the Workflow Community survey", + "_key": "eb8144492759" + }, + { + "_type": "span", + "marks": [], + "text": " to gather insights and feedback from the ", + "_key": "cc6ecf47c330" + }, + { + "_key": "19c26daec2823", + "_type": "span", + "marks": [ + "3025a534b404" + ], + "text": "Nextflow" + }, + { + "marks": [], + "text": " user community, and we are excited to share that this year, ", + "_key": "19c26daec2824", + "_type": "span" + }, + { + "_key": "19c26daec2825", + "_type": "span", + "marks": [ + "strong" + ], + "text": "600+ Nextflow users" + }, + { + "marks": [], + "text": " participated - a 21% increase from 2023! By sharing these insights, we aim to empower researchers, developers, and organizations to leverage Nextflow effectively, fostering innovation and collaboration amongst the community. Here we share some key findings from the Nextflow user community.\n\n", + "_key": "19c26daec2826", + "_type": "span" + }, + { + "text": "DOWNLOAD THE FULL SURVEY", + "_key": "a71cfed51202", + "_type": "span", + "marks": [ + "c521b2714e5e", + "strong", + "underline" + ] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "eefa4afc5452", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "bc60c34cfbea" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Key Findings At a Glance", + "_key": "1789872287f70" + } + ], + "_type": "block", + "style": "h2", + "_key": "a163a8498996" + }, + { + "children": [ + { + "text": "", + "_key": "1b07495615020", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "3773f2653df9", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "✔ Shift from HPC to public cloud ", + "_key": "159cafaf5df10" + }, + { + "marks": [], + "text": "- Majority of biotech and industrial sectors now favor public clouds for running Nextflow, with 78% indicating plans to migrate in the next two years.\n", + "_key": "5612ee1622a6", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "ab334e5b3812", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "✔ Multi-cloud deployments are on the rise", + "_key": "020a0bd3ea7a0" + }, + { + "_type": "span", + "marks": [], + "text": " - To meet growing computational and data availability needs, 14% of Nextflow users manage workloads across two clouds.", + "_key": "020a0bd3ea7a1" + } + ], + "_type": "block", + "style": "normal", + "_key": "65e306b63bb8", + "markDefs": [] + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "2fc2e91abdef0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "9660db208e5b", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "✔ Open Science is key for streamlining research", + "_key": "0c05be0b85150" + }, + { + "_type": "span", + "marks": [], + "text": " - 82% of Nextflow users view Open Science as fundamental to research, advancing science for everyone.", + "_key": "0c05be0b85151" + } + ], + "_type": "block", + "style": "normal", + "_key": "529f6f989016", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "03d694b833bd", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "6a2a3e4ecd55", + "_type": "span", + "marks": [] + } + ] + }, + { + "_type": "image", + "_key": "5fd07cc3c181", + "asset": { + "_ref": "image-3fff8e82dc7ab66c289f1c32186e563997af4e7f-1200x836-png", + "_type": "reference" + } + }, + { + "_type": "block", + "style": "normal", + "_key": "3012982ecad0", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "4a6b7ff26854" + } + ] + }, + { + "children": [ + { + "marks": [], + "text": "Bioinformatics Analysis is Moving to Public Clouds", + "_key": "94dd65e5e77d0", + "_type": "span" + } + ], + "_type": "block", + "style": "h3", + "_key": "c100760180b5", + "markDefs": [] + }, + { + "style": "normal", + "_key": "47d223e5d87e", + "markDefs": [], + "children": [ + { + "text": "In recent years, we have witnessed a notable shift in bioinformatics analysis towards public cloud platforms, driven largely by for-profit organizations seeking enhanced reliability, scalability and flexibility in their computational workflows. Our survey found that while on-premises clusters remain the most common for users in general, the prevalence of traditional HPC environments is on a steady decline. Specifically, in the biotech industry, nearly ", + "_key": "f848cb5dd3cc0", + "_type": "span", + "marks": [] + }, + { + "text": "three-quarters of firms now favor public clouds, ", + "_key": "f848cb5dd3cc1", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "f848cb5dd3cc2", + "_type": "span", + "marks": [], + "text": "reflecting a broader industry trend toward adaptable and robust computing solutions. " + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "474b62cda575", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "e531fa4ad4dc" + } + ], + "_type": "block" + }, + { + "children": [ + { + "text": "", + "_key": "6a32f4c62759", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "d571f398163a", + "markDefs": [] + }, + { + "_key": "77bfd81b8e7f", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Multi-Cloud Deployments are Rising", + "_key": "cbc09602d1f20" + } + ], + "_type": "block", + "style": "h3" + }, + { + "markDefs": [], + "children": [ + { + "_key": "b8fc76e313c00", + "_type": "span", + "marks": [], + "text": "As the industry continues to scale their workflow, they are increasingly adopting multi-cloud strategies to meet the demands of diverse computational workflows. In 2021, just " + }, + { + "text": "10% of cloud batch service users", + "_key": "b8fc76e313c01", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": " were running workloads in ", + "_key": "b8fc76e313c02" + }, + { + "marks": [ + "strong" + ], + "text": "two separate clouds", + "_key": "b8fc76e313c03", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": ". By 2024, this figure had ", + "_key": "b8fc76e313c04" + }, + { + "text": "risen to 14%", + "_key": "b8fc76e313c05", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": ". Additionally, 3% of users utilized three different cloud batch services in 2021, which increased to 4% by 2024. This trend highlights the ", + "_key": "b8fc76e313c06" + }, + { + "marks": [ + "strong" + ], + "text": "move towards deploying across multiple cloud providers", + "_key": "b8fc76e313c07", + "_type": "span" + }, + { + "_key": "b8fc76e313c08", + "_type": "span", + "marks": [], + "text": " to address bioinformatics' growing computational and data availability needs across various regions and technical complexities." + } + ], + "_type": "block", + "style": "normal", + "_key": "d8db77c8db24" + }, + { + "style": "normal", + "_key": "7428ac4b77d7", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "fa0bd7669202", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "e835fa9dd922" + } + ], + "_type": "block", + "style": "normal", + "_key": "303f6c725740" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Open Science: Advancing Science for Everyone", + "_key": "cf20f469a9910" + } + ], + "_type": "block", + "style": "h3", + "_key": "dff1615fb612", + "markDefs": [] + }, + { + "children": [ + { + "marks": [], + "text": "Open Science has emerged as a transformative approach within the bioinformatics community, significantly enhancing collaboration, efficiency, and cost-effectiveness. Around ", + "_key": "957a2c3bdc7a0", + "_type": "span" + }, + { + "marks": [ + "strong" + ], + "text": "82% of survey respondents", + "_key": "957a2c3bdc7a1", + "_type": "span" + }, + { + "marks": [], + "text": " emphasized the", + "_key": "957a2c3bdc7a2", + "_type": "span" + }, + { + "marks": [ + "strong" + ], + "text": " fundamental role of Open Science", + "_key": "957a2c3bdc7a3", + "_type": "span" + }, + { + "_key": "957a2c3bdc7a4", + "_type": "span", + "marks": [], + "text": " in their research practices, reflecting strong community endorsement. Additionally, two-thirds reported " + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "significant time-savings", + "_key": "957a2c3bdc7a5" + }, + { + "_key": "957a2c3bdc7a6", + "_type": "span", + "marks": [], + "text": " through Open Science and 42% acknowledged the " + }, + { + "marks": [ + "strong" + ], + "text": "financial benefits, ", + "_key": "957a2c3bdc7a7", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": "highlighting the value of transparency in research. This shift fosters effective knowledge sharing and collaborative advancement, accelerating research outcomes while reinforcing accountability and scientific integrity.", + "_key": "957a2c3bdc7a8" + } + ], + "_type": "block", + "style": "normal", + "_key": "0e370eea37e9", + "markDefs": [] + }, + { + "_key": "a09d247e84e8", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "e803e4047b81" + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Read the Full Report Now", + "_key": "7f3ac0c64cdf" + } + ], + "_type": "block", + "style": "h2", + "_key": "2850ca907c4c", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_key": "b357bf5090ac0", + "_type": "span", + "marks": [], + "text": "Our 2024 State of the Workflow Community Survey provides insights into the evolving landscape of bioinformatics and scientific computing. The shift towards public and multi-cloud platforms, combined with the transformative impact of Open Science, is reshaping the Nextflow ecosystem and revolutionizing computational workflows. Embracing these trends not only drives innovation but also ensures that scientific inquiry remains robust, accountable, and accessible to all, paving the way for continued progress in bioinformatics and beyond." + } + ], + "_type": "block", + "style": "normal", + "_key": "1b90b2084f12" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://nf-co.re/", + "_key": "517a41101c8d" + }, + { + "_type": "link", + "href": "https://hubs.la/Q02HMCZ70", + "_key": "688962534403" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Dive into the ", + "_key": "01a3a713b8430" + }, + { + "marks": [ + "688962534403" + ], + "text": "full report", + "_key": "01a3a713b8431", + "_type": "span" + }, + { + "marks": [], + "text": " to uncover further insights on how bioinformaticians are running pipelines, the pivotal role of the ", + "_key": "01a3a713b8432", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "517a41101c8d" + ], + "text": "nf-core community", + "_key": "01a3a713b8433" + }, + { + "text": ", and other key trends —your glimpse into the future of computational workflows awaits!\n", + "_key": "01a3a713b8434", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "c690c04d6473" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "722484bb3cf0" + } + ], + "_type": "block", + "style": "normal", + "_key": "d7acd64713cf" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://hubs.la/Q02HMCZ70", + "_key": "042ff6244a59" + } + ], + "children": [ + { + "text": "DOWNLOAD THE FULL SURVEY NOW", + "_key": "e3fcb08b34ad0", + "_type": "span", + "marks": [ + "042ff6244a59", + "strong", + "underline" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "729b6f099a85" + }, + { + "style": "normal", + "_key": "7d81e062e8a9", + "markDefs": [], + "children": [ + { + "text": "\n", + "_key": "d1cf1e278328", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "strong", + "underline" + ], + "text": "\n\n", + "_key": "cbd3cd3cbad9" + } + ], + "_type": "block" + } + ], + "title": "The State of the Workflow 2024: Community Survey Results", + "meta": { + "_type": "meta", + "description": "State of the Workflow 2024 Community Survey Results: Insights from 600+ Nextflow users about the state of workflow management for scientific data analysis", + "noIndex": false, + "slug": { + "current": "the-state-of-the-workflow-2024-community-survey-results", + "_type": "slug" + } + }, + "author": { + "_ref": "evan-floden", + "_type": "reference" + }, + "_rev": "c8Y6ejr6xtast8r4qB9SlG" + }, + { + "author": { + "_ref": "paolo-di-tommaso", + "_type": "reference" + }, + "_rev": "n1tMSWxwIdUSjJ5EuKAZgf", + "body": [ + { + "_type": "block", + "style": "h2", + "_key": "5c3efce1d43a", + "markDefs": [], + "children": [ + { + "text": "Streamlining containers lifecycle", + "_key": "9f757a56788c0", + "_type": "span", + "marks": [ + "strong" + ] + } + ] + }, + { + "markDefs": [], + "children": [ + { + "text": "In the bioinformatics landscape, containerized workflows have become crucial for ensuring reproducibility in data analysis. By encapsulating applications and their dependencies into", + "_key": "bbd08134fa220", + "_type": "span", + "marks": [] + }, + { + "text": " portable, self-contained packages", + "_key": "bbd08134fa221", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "text": ", containers enable seamless distribution across diverse computing environments. However, this innovation comes with its own set of challenges such as maintaining and validating collections of images, operating private registries and limited tool access.", + "_key": "bbd08134fa222", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "133925eb69a3" + }, + { + "_type": "block", + "style": "normal", + "_key": "5406af305818", + "markDefs": [ + { + "_key": "3af825e880ca", + "_type": "link", + "href": "https://seqera.io/wave/" + }, + { + "href": "https://seqera.io/containers/", + "_key": "df13db0993d9", + "_type": "link" + } + ], + "children": [ + { + "marks": [], + "text": "Seqera’s ", + "_key": "ea38e78f202c0", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "3af825e880ca" + ], + "text": "Wave", + "_key": "ea38e78f202c1" + }, + { + "_key": "ea38e78f202c2", + "_type": "span", + "marks": [], + "text": " tackles these challenges by offering a suite of features designed to simplify the configuration, provisioning and management of software containers for data pipelines at scale. In this blog, we will explore common pitfalls of managing containerized workflows, examine how Wave overcomes these obstacles, and discover how " + }, + { + "_type": "span", + "marks": [ + "df13db0993d9" + ], + "text": "Seqera Containers", + "_key": "ea38e78f202c3" + }, + { + "text": " further enhances the Wave user experience.", + "_key": "ea38e78f202c4", + "_type": "span", + "marks": [] + } + ] + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "d494224541cb", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "ce9241113361" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://hubs.la/Q02P4r9W0", + "_key": "34a3f2d2cbdc" + } + ], + "children": [ + { + "_key": "3d0c7b96ea780", + "_type": "span", + "marks": [ + "34a3f2d2cbdc" + ], + "text": "Read the Whitepaper Now!" + } + ], + "_type": "block", + "style": "blockquote", + "_key": "e2920de8bcf3" + }, + { + "children": [ + { + "_key": "9b968ce154aa", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "95228c8b8d8b", + "markDefs": [] + }, + { + "style": "h2", + "_key": "24eb80288494", + "markDefs": [], + "children": [ + { + "text": "Handling containerized workflows at scale is not easy", + "_key": "1c7b66a371820", + "_type": "span", + "marks": [ + "strong" + ] + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "2057a7ea39aa", + "markDefs": [ + { + "_type": "link", + "href": "https://biocontainers.pro/", + "_key": "d3824d3cdd3a" + } + ], + "children": [ + { + "text": "Software containers have been heavily adopted as a solution to streamline both the configuration and deployment of dependencies in complex data pipelines. However, maintaining containers at scale is not without its difficulties. Building, storing and distributing container images is an error-prone and tedious task that increases the cognitive load on software engineers, ultimately diminishing their productivity. Community-maintained container collections, such as ", + "_key": "d660fe7595e30", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "d3824d3cdd3a" + ], + "text": "BioContainers", + "_key": "d660fe7595e31", + "_type": "span" + }, + { + "_key": "d660fe7595e32", + "_type": "span", + "marks": [], + "text": ", have emerged to mitigate some of these challenges. However, still, several problems remain:" + } + ] + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Publicly Accessible Container Images", + "_key": "a01a55bd260e0", + "_type": "span" + }, + { + "marks": [], + "text": ": Issues with stability can compromise reliability. Typically unsuitable for non-academic organizations due to security and compliance concerns.\n\n", + "_key": "a01a55bd260e1", + "_type": "span" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "3099a4010964" + }, + { + "_key": "592901bbf056", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Limited Tool Access: ", + "_key": "2b4dea072f470" + }, + { + "text": "Access is restricted to only to specific tools or collections (e.g. BioConda). Organizations often need the flexibility to assemble and deploy custom containers.\n\n", + "_key": "2b4dea072f471", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "1969250dec82", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "API Rate Limits:", + "_key": "f3612e248c410" + }, + { + "_type": "span", + "marks": [], + "text": " Public registries often impose low API rate limits and afford low-rate or low-quality SLAs, making them unsuitable for production workloads.\n\n", + "_key": "f3612e248c411" + } + ], + "level": 1, + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Egress Costs", + "_key": "5a3fdfc32935" + }, + { + "text": ": Use of private registries can incur outbound data transfer costs, particularly when deploying pipelines at scale across multiple regions or cloud providers.\n\n", + "_key": "dd0bf6ca10b3", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "9ea73e59c666", + "listItem": "bullet" + }, + { + "children": [ + { + "marks": [], + "text": "Seqera’s ", + "_key": "2d0b37d34c330", + "_type": "span" + }, + { + "_key": "2d0b37d34c331", + "_type": "span", + "marks": [ + "f54c460b69c3" + ], + "text": "Wave" + }, + { + "_key": "2d0b37d34c332", + "_type": "span", + "marks": [], + "text": " solves these problems by simplifying the management of containerized bioinformatics workflows by " + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "provisioning containers on-demand during pipeline execution.", + "_key": "2d0b37d34c333" + }, + { + "_type": "span", + "marks": [], + "text": " This approach ensures the delivery of container images that are defined precisely depending on requirements of each pipeline task in terms of dependencies and platform architecture. The process is ", + "_key": "2d0b37d34c334" + }, + { + "text": "completely transparent and fully automated,", + "_key": "2d0b37d34c335", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_key": "2d0b37d34c336", + "_type": "span", + "marks": [], + "text": " eliminating the need to manually create, upload and maintain the numerous container images required for pipeline execution." + } + ], + "_type": "block", + "style": "normal", + "_key": "1bd6e257c774", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/wave/", + "_key": "f54c460b69c3" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "text": "By integrating containers as ", + "_key": "a75abd67740a0", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "dynamic pipeline components ", + "_key": "a75abd67740a1" + }, + { + "marks": [], + "text": "rather than standalone artifacts, Wave streamlines development, enhances reliability, and reduces maintenance overhead. This makes it easier for developers and operations teams to build, deploy, and manage containers efficiently and securely.", + "_key": "a75abd67740a2", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "b7b00332c37f" + }, + { + "_key": "2bcc79444f9d", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "ff8ecd1929ad" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "How does Wave work?", + "_key": "ada86664aefe0", + "_type": "span" + } + ], + "_type": "block", + "style": "h2", + "_key": "bc1396a38011" + }, + { + "markDefs": [ + { + "_key": "0d6219f32dfe", + "_type": "link", + "href": "https://training.nextflow.io/basic_training/containers/#container-directives" + } + ], + "children": [ + { + "_key": "d2a901bcfde60", + "_type": "span", + "marks": [], + "text": "Wave transforms containers and pipeline management by allowing bioinformaticians to specify container requirements directly within their pipeline definitions. Instead of referencing manually created container images in " + }, + { + "marks": [ + "0d6219f32dfe" + ], + "text": "Nextflow’s ", + "_key": "d2a901bcfde61", + "_type": "span" + }, + { + "_key": "d2a901bcfde62", + "_type": "span", + "marks": [ + "0d6219f32dfe", + "em" + ], + "text": "container" + }, + { + "_type": "span", + "marks": [ + "0d6219f32dfe" + ], + "text": " directive", + "_key": "d2a901bcfde63" + }, + { + "_type": "span", + "marks": [], + "text": ", developers can either include a Dockerfile in the directory where the process' module is defined or just instruct Wave to use the Conda package associated with the process definition. By using this information, Wave provisions a container on-demand either using an existing container image in the target registry matching the specified requirement or building an new one on-the-fly to fulfill a new request, and returns the container URI pointing to the Wave container for process execution. The built container is then pushed to a destination registry and returned to the pipeline for execution, ensuring seamless integration and optimization across ", + "_key": "d2a901bcfde64" + }, + { + "marks": [ + "strong" + ], + "text": "diverse computational architectures.", + "_key": "d2a901bcfde65", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "86f8349d8dd9" + }, + { + "style": "normal", + "_key": "4bd12760fcd8", + "markDefs": [ + { + "_key": "b0443a516524", + "_type": "link", + "href": "https://www.nextflow.io/docs/latest/config.html" + } + ], + "children": [ + { + "marks": [], + "text": "Wave can also direct containers into a registry specified in the ", + "_key": "201624e167ae0", + "_type": "span" + }, + { + "_key": "201624e167ae1", + "_type": "span", + "marks": [ + "b0443a516524" + ], + "text": "nextflow.config file" + }, + { + "_type": "span", + "marks": [], + "text": ", along with other pipeline settings. This means containers can be served from cloud registries closer to where pipelines are executed, delivering ", + "_key": "201624e167ae2" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "better performance and reducing network traffic", + "_key": "201624e167ae3" + }, + { + "marks": [], + "text": ". Moreover, Wave operates independently, serving as a versatile tool for bioinformaticians across various platforms and workflows. By employing ", + "_key": "201624e167ae4", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "multi-level caching,", + "_key": "201624e167ae5" + }, + { + "marks": [], + "text": " Wave ensures that containers are built only once or when the Dockerfile changes, enhancing efficiency and streamlining the management of bioinformatics workflows.", + "_key": "201624e167ae6", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_type": "image", + "_key": "6f161a64aa34", + "asset": { + "_ref": "image-63c1caffc660a4c615ef2551318bc7b8fb8eca7b-2165x680-png", + "_type": "reference" + } + }, + { + "children": [ + { + "_key": "2bb9a056f41c0", + "_type": "span", + "marks": [ + "strong", + "em" + ], + "text": "Figure 1." + }, + { + "_type": "span", + "marks": [ + "em" + ], + "text": " Wave —a smart container provisioning and augmentation service for Nextflow.", + "_key": "2bb9a056f41c1" + } + ], + "_type": "block", + "style": "normal", + "_key": "e33f18dee5bf", + "markDefs": [] + }, + { + "children": [ + { + "_key": "4e5278eb85f4", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "f6e484ee66d6", + "markDefs": [] + }, + { + "_type": "block", + "style": "h2", + "_key": "1f7634971dca", + "markDefs": [], + "children": [ + { + "text": "Key features of Wave", + "_key": "a220fc9a291d0", + "_type": "span", + "marks": [ + "strong" + ] + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "14d7473d0d0f", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "✔ ", + "_key": "5178d679f1fd0" + }, + { + "marks": [ + "strong" + ], + "text": "Access private container repositories", + "_key": "5178d679f1fd1", + "_type": "span" + }, + { + "_key": "5178d679f1fd2", + "_type": "span", + "marks": [], + "text": ": Seamlessly integrate Nextflow pipelines with Seqera Platform to grant access to private container repositories." + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "ee1c448e55ff", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "✔ ", + "_key": "770c482bc4f10" + }, + { + "_key": "770c482bc4f11", + "_type": "span", + "marks": [ + "strong" + ], + "text": "On-demand container provisioning:" + }, + { + "_type": "span", + "marks": [], + "text": " Automatically provision containers (via Dockerfile or Conda packages) based on dependencies in your Nextflow pipeline, enhancing efficiency, reducing errors, and eliminating the need for separate container builds and maintenance.", + "_key": "770c482bc4f12" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "✔ ", + "_key": "753a230208440" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Enhanced security", + "_key": "753a230208441" + }, + { + "marks": [], + "text": ": Each new container provisioned by Wave undergoes a security scan to identify potential vulnerabilities.", + "_key": "753a230208442", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "dfc5be5494ed", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "✔", + "_key": "7ec429cf22a90", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": " Create multi-tool and multi-package containers", + "_key": "7ec429cf22a91" + }, + { + "text": ": Easily build and manage containers with diverse tools and packages, streamlining complex workflows with multiple dependencies.", + "_key": "7ec429cf22a92", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "e50b6a0ff82a" + }, + { + "style": "normal", + "_key": "a8c485516400", + "markDefs": [], + "children": [ + { + "_key": "51c15bf17a880", + "_type": "span", + "marks": [], + "text": "✔" + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": " Provision multi-format and multi-platform containers: ", + "_key": "51c15bf17a881" + }, + { + "marks": [], + "text": "Automatically provision containers for Docker or Singularity based on your Nextflow pipeline configuration and platform, including ARM64 containers for AWS Graviton if a compatible Dockerfile or Conda package is provided.", + "_key": "51c15bf17a882", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "741df5fa35600", + "_type": "span", + "marks": [], + "text": "✔ " + }, + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Mirror Public and Private Repositories", + "_key": "741df5fa35601" + }, + { + "text": ": Mirror the containers needed by your pipelines in a registry co-located with where pipeline execution is carried out, allowing optimized data transfer costs and accelerated execution of pipeline tasks.", + "_key": "741df5fa35602", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "8d580f86471d" + }, + { + "_key": "e1cf4e3eabf4", + "markDefs": [ + { + "_type": "link", + "href": "https://hubs.la/Q02P4r9W0", + "_key": "45a6b1f720b3" + } + ], + "children": [ + { + "text": "Download the Whitepaper", + "_key": "5ebb5fcdeb6b", + "_type": "span", + "marks": [ + "45a6b1f720b3" + ] + }, + { + "_key": "835caf3a91f0", + "_type": "span", + "marks": [], + "text": " to explore features in more detail" + } + ], + "_type": "block", + "style": "blockquote" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong" + ], + "text": "Seqera Containers for publicly accessible container images", + "_key": "f4fa5324fa620" + } + ], + "_type": "block", + "style": "h2", + "_key": "418255c8286d" + }, + { + "_key": "6b076c6e5be9", + "markDefs": [ + { + "_type": "link", + "href": "https://hubs.la/Q02P4rwk0", + "_key": "49dd8bebf517" + } + ], + "children": [ + { + "text": "With the newly launched ", + "_key": "f2dc6721adb90", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "49dd8bebf517" + ], + "text": "Seqera Containers", + "_key": "f2dc6721adb91", + "_type": "span" + }, + { + "marks": [], + "text": ", the Wave experience is elevated even further. Now, instead of browsing existing container images as with a traditional container registry, users can just specify which tools they require through an ", + "_key": "f2dc6721adb92", + "_type": "span" + }, + { + "_key": "f2dc6721adb93", + "_type": "span", + "marks": [ + "strong" + ], + "text": "intuitive and user-friendly web interface. " + }, + { + "text": "This will find an existing container image for the required tool(s) or build a container on-the-fly using the Wave service. Currently it supports any software package provided by the Bioconda, Conda forge and Pypi Conda channels. Container can be built both for Docker and Singularity image format and linux/amd64 and linux/amd64 CPU architecture.", + "_key": "f2dc6721adb94", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "d950641601c00", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "4c985acdcf31" + }, + { + "_key": "e81e3d89583c", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/containers/", + "_key": "171c64cfcfbe" + }, + { + "_key": "339b65aacd58", + "_type": "link", + "href": "https://community.wave.seqera.io/" + } + ], + "children": [ + { + "_key": "3a00b180c5180", + "_type": "span", + "marks": [], + "text": "Additionally, " + }, + { + "text": "Seqera Containers", + "_key": "3a00b180c5181", + "_type": "span", + "marks": [ + "171c64cfcfbe" + ] + }, + { + "marks": [], + "text": " are stored permanently and publicly accessible via the registry host ", + "_key": "3a00b180c5182", + "_type": "span" + }, + { + "_key": "8c65146fbd29", + "_type": "span", + "marks": [ + "339b65aacd58" + ], + "text": "community.wave.seqera.io" + }, + { + "_key": "11bc24deeace", + "_type": "span", + "marks": [], + "text": ". This ensures that any future requests for the same package will return the exact container image, guaranteeing reproducibility across runs. Seqera Containers project was developed in collaboration with Amazon Web Service, which is sponsoring the container hosting infrastructure.\n" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "image", + "_key": "7035fe8ea204", + "asset": { + "_ref": "image-d505d0b687501b2f43a47a688dc2e096886fbfff-883x451-jpg", + "_type": "reference" + } + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "strong", + "em" + ], + "text": "Figure 2", + "_key": "5497721bd8e40" + }, + { + "_key": "5497721bd8e41", + "_type": "span", + "marks": [ + "em" + ], + "text": ". Snapshot of Seqera Containers, demonstrating how you can create containers with the tools you want, on the fly." + }, + { + "text": "\n", + "_key": "5fccd7d33cef", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "c5f9e1119426" + }, + { + "markDefs": [], + "children": [ + { + "marks": [ + "strong" + ], + "text": "Discover the benefits of Wave", + "_key": "f811f3a434440", + "_type": "span" + } + ], + "_type": "block", + "style": "h2", + "_key": "aa383c5b254c" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "9da5596afc7b0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "a9639af3c287" + }, + { + "markDefs": [], + "children": [ + { + "_key": "a44c589047e10", + "_type": "span", + "marks": [], + "text": "Wave offers a transformative solution to the complexities of managing containerized bioinformatics workflows. By integrating containers directly into pipelines and prioritizing flexibility and efficiency, Wave streamlines development, enhances security, and optimizes performance across diverse computing environments. Deep dive into how Wave can revolutionize your workflow management by downloading our whitepaper today." + } + ], + "_type": "block", + "style": "normal", + "_key": "5f5fe5035844" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://hubs.la/Q02P4r9W0", + "_key": "29092c152215" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "29092c152215" + ], + "text": "Download the Wave Whitepaper", + "_key": "08116ce2b1b70" + } + ], + "_type": "block", + "style": "blockquote", + "_key": "c4ebd70557cd" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "2d5affde12080" + } + ], + "_type": "block", + "style": "normal", + "_key": "2deff6b4aab7" + } + ], + "_updatedAt": "2024-09-10T08:00:16Z", + "tags": [ + { + "_ref": "6f35c54a-0d93-4aef-9d80-bd4ccb6527b4", + "_type": "reference", + "_key": "e6e4331ef27a" + } + ], + "meta": { + "description": "In the bioinformatics landscape, containerized workflows have become crucial for ensuring reproducibility in data analysis. By encapsulating applications and their dependencies into portable, self-contained packages, containers enable seamless distribution across diverse computing environments.", + "noIndex": false, + "slug": { + "current": "wave-rethinking-software-containers-for-data-pipelines", + "_type": "slug" + }, + "_type": "meta" + }, + "_type": "blogPost", + "_id": "b032b7fb-8dc8-464e-b4c8-18cc9b8c2dd1", + "publishedAt": "2024-09-10T07:44:00.000Z", + "_createdAt": "2024-09-09T07:56:25Z", + "title": "Wave: rethinking software containers for data pipelines" + }, + { + "_id": "b4ad09fa-b8ee-484f-9843-57c3073027a8", + "_rev": "hLqYCNYcORjetYCGdcbaMx", + "_updatedAt": "2024-08-27T23:50:43Z", + "_createdAt": "2024-04-29T15:30:18Z", + "_type": "blogPost", + "title": "Data Studios – Interactive analysis in Seqera Platform", + "publishedAt": "2024-05-23T12:29:00.000Z", + "author": { + "_type": "reference", + "_ref": "f25df58c-156e-4294-98ba-f9dcd6860c39" + }, + "body": [ + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Nextflow is the de facto standard for reproducible workflows in the cloud, but the scientific data lifecycle is much broader than just pipelines — including iterative development, tertiary analysis, and data modeling. With the Seqera Platform, we aim to enable rapid iteration and collaboration across the entire scientific lifecycle, saving you time whether you’re experimenting, conducting research, preparing for your next clinical trial, or producing a new therapeutic.", + "_key": "9b67215adc020", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "3747df96bf39" + }, + { + "children": [ + { + "_key": "68aad3fffb30", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "d0aeec9f5ae8", + "markDefs": [] + }, + { + "style": "normal", + "_key": "eede8d97a919", + "markDefs": [ + { + "_type": "link", + "href": "https://youtu.be/yfMFFHTR-dk?feature=shared", + "_key": "241de9355755" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "In October 2023, Seqera CEO and co-founder Evan Floden ", + "_key": "82d742a05e220" + }, + { + "text": "unveiled the private-preview of Data Studios", + "_key": "82d742a05e221", + "_type": "span", + "marks": [ + "241de9355755" + ] + }, + { + "_key": "82d742a05e222", + "_type": "span", + "marks": [], + "text": ", enabling streamlined creation of collaborative notebook environments using cloud-native components coupled with your data and hosted in your own secure environment. Today we’re excited to announce that Data Studios is publicly available to all Seqera Cloud users in Public Preview!" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Combining Workflows and Data Analysis", + "_key": "b01378acab3e0" + } + ], + "_type": "block", + "style": "h2", + "_key": "5ee9848c5668", + "markDefs": [] + }, + { + "children": [ + { + "marks": [], + "text": "Nextflow and Seqera Platform are enormously effective at launching, managing, and collaborating on scientific data analysis pipelines. However, a pipeline run is often not where the analysis ends, and for every user who needs to run and manage pipelines, many others, including analysts and data scientists, need interactive environments such as ", + "_key": "83cca5c492c90", + "_type": "span" + }, + { + "_key": "83cca5c492c91", + "_type": "span", + "marks": [ + "deaaa72caddd" + ], + "text": "Jupyter Notebooks" + }, + { + "_type": "span", + "marks": [], + "text": " or ", + "_key": "83cca5c492c92" + }, + { + "_type": "span", + "marks": [ + "ba240fce2f0a" + ], + "text": "RStudio", + "_key": "83cca5c492c93" + }, + { + "marks": [], + "text": ". These are used for exploratory data analysis, modeling, and building visualizations and dashboards for analyzing and sharing scientific results.", + "_key": "83cca5c492c94", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "6f8a7dbbff78", + "markDefs": [ + { + "_key": "deaaa72caddd", + "_type": "link", + "href": "https://jupyter.org/" + }, + { + "href": "https://posit.co/products/open-source/rstudio-server/", + "_key": "ba240fce2f0a", + "_type": "link" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "For scientific users, deploying and configuring secure, performant interactive notebook environments to work with data in context has traditionally been surprisingly hard. As a concrete example, consider a scenario where a pipeline is running on AWS, and a data scientist wants to analyze results stored in Amazon S3 using a familiar Jupyter Notebook. Configuration doesn’t happen by itself: the notebook must be hosted, made network accessible, authorization limited to specific groups of users, and pre-configured with packages commonly used in bioinformatics, such as ", + "_key": "cad5c149afc70" + }, + { + "text": "Biopython", + "_key": "cad5c149afc71", + "_type": "span", + "marks": [ + "176bf45acd7f" + ] + }, + { + "text": ", ", + "_key": "cad5c149afc72", + "_type": "span", + "marks": [] + }, + { + "text": "NumPy", + "_key": "cad5c149afc73", + "_type": "span", + "marks": [ + "23c777ded073" + ] + }, + { + "_type": "span", + "marks": [], + "text": ", ", + "_key": "cad5c149afc74" + }, + { + "_type": "span", + "marks": [ + "76defb7bdf72" + ], + "text": "Scikit-learn", + "_key": "cad5c149afc75" + }, + { + "marks": [], + "text": ", and ", + "_key": "cad5c149afc76", + "_type": "span" + }, + { + "_key": "cad5c149afc77", + "_type": "span", + "marks": [ + "05f2be477354" + ], + "text": "Matplotlib" + }, + { + "text": ".", + "_key": "cad5c149afc78", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "6c284cb1c78b", + "markDefs": [ + { + "_type": "link", + "href": "https://biopython.org/", + "_key": "176bf45acd7f" + }, + { + "_type": "link", + "href": "https://numpy.org/", + "_key": "23c777ded073" + }, + { + "_key": "76defb7bdf72", + "_type": "link", + "href": "https://scikit-learn.org/stable/" + }, + { + "_type": "link", + "href": "https://matplotlib.org/", + "_key": "05f2be477354" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "33ed6097af29", + "markDefs": [ + { + "_type": "link", + "href": "https://pandas.pydata.org/", + "_key": "b03950b8cd86" + }, + { + "_type": "link", + "href": "https://pypi.org/project/s3fs/", + "_key": "d9bd8791dd9f" + } + ], + "children": [ + { + "marks": [], + "text": "Before data can even be read using ", + "_key": "79bac17c20c90", + "_type": "span" + }, + { + "_key": "79bac17c20c91", + "_type": "span", + "marks": [ + "b03950b8cd86" + ], + "text": "pandas" + }, + { + "_type": "span", + "marks": [], + "text": ", ", + "_key": "79bac17c20c92" + }, + { + "text": "s3fs", + "_key": "79bac17c20c93", + "_type": "span", + "marks": [ + "d9bd8791dd9f" + ] + }, + { + "text": " must be installed, which in turn depends on other prerequisite packages. Additionally, Notebook users must know the paths to the S3 buckets where the data files reside, including the Nextflow pipeline work directory, and have appropriate access.", + "_key": "79bac17c20c94", + "_type": "span", + "marks": [] + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Multiply this complexity across multiple tools, cloud providers, file stores, languages, and libraries, and you get the picture: configuring these environments is tedious, time-consuming, error-prone, and often beyond the privilege-level or expertise of analysts.", + "_key": "656ebd1feba10" + } + ], + "_type": "block", + "style": "normal", + "_key": "4d7987426cdb", + "markDefs": [] + }, + { + "children": [ + { + "marks": [], + "text": "Data Studios – Simplifying Analysis Environment Management", + "_key": "9f2d80bce8e40", + "_type": "span" + } + ], + "_type": "block", + "style": "h2", + "_key": "5f76dcbc9e10", + "markDefs": [] + }, + { + "style": "normal", + "_key": "9ee6aebb5ed6", + "markDefs": [], + "children": [ + { + "_key": "7a29798a76410", + "_type": "span", + "marks": [], + "text": "Data Studios enable you to easily create, manage, and share notebook environments in Seqera Platform using point-and-click actions — connecting your data to on-demand batch computing resources — similar to how you currently manage Nextflow pipelines." + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "text": "Like pipelines, Data Studios enable simple deployment and scaling using customizable, ephemeral compute environments and containers. You add new interactive environments based on predefined templates, as shown below, defining your own metadata, vCPUs and memory, and deploying them with any (public or private) data mounted on a variety of compute environments already configured in Seqera Platform.", + "_key": "e89cae2f83e70", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "b1c7a00ccc8a" + }, + { + "_type": "youtube", + "id": "hXqaxkfx5Fo", + "_key": "828447aabb80" + }, + { + "children": [ + { + "text": "The initial release of Data Studios ships with pre-built container templates for Jupyter and RStudio, and environments can be shared with individuals and teams in Seqera Platform using Role Based Access Control (RBAC).", + "_key": "9858767267210", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "19737e623cef", + "markDefs": [] + }, + { + "style": "normal", + "_key": "8c7b3e38ba0a", + "markDefs": [], + "children": [ + { + "text": "The productivity impacts for data scientists and analysts are profound: you can launch your preferred interactive environment with a single click, pre-configured with the necessary libraries and notebook markdown files, and have immediate access to pipeline data output for real-time analysis in-context. Furthermore, you can collaborate with colleagues by securely sharing Data Studios, along with the code and visualizations within. Some use cases already developed include:", + "_key": "f5a74dcbc4f90", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "listItem": "bullet", + "markDefs": [ + { + "_key": "6962bf0684f4", + "_type": "link", + "href": "https://nf-co.re/scrnaseq/2.6.0" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Processing single-cell RNAseq data using ", + "_key": "8597e771fe3c0" + }, + { + "marks": [ + "6962bf0684f4" + ], + "text": "nf-core/scrnaseq", + "_key": "8597e771fe3c1", + "_type": "span" + }, + { + "_key": "8597e771fe3c2", + "_type": "span", + "marks": [], + "text": " and performing downstream, interactive analysis using the popular Scanpy (Python) or Seurat (R) packages." + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "1dbbb766369d" + }, + { + "listItem": "bullet", + "markDefs": [ + { + "_type": "link", + "href": "https://nf-co.re/differentialabundance/1.5.0", + "_key": "ca2f018962e4" + } + ], + "children": [ + { + "marks": [], + "text": "Running a differential gene expression analysis using ", + "_key": "f3f51ce005e00", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "ca2f018962e4" + ], + "text": "nf-core/differentialabundance", + "_key": "f3f51ce005e01" + }, + { + "_type": "span", + "marks": [], + "text": " and launching an R Shiny app to explore the results in an RStudio notebook.", + "_key": "f3f51ce005e02" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "5fb4b2606c71" + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "Extending pipeline functionality by experimenting with Nextflow and Bash in VSCode directly using output data from your pipeline run in Seqera Platform.", + "_key": "38ecc47eb508", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "d69c14515886" + }, + { + "asset": { + "_ref": "image-cca2af1246925935f1649dab3584aeb6d5d63d58-1314x858-png", + "_type": "reference" + }, + "_type": "image", + "_key": "50f8e11bbc35" + }, + { + "children": [ + { + "text": "Snapshots and Session Persistence", + "_key": "c7c22f10dc4a0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2", + "_key": "b53f59b5c016", + "markDefs": [] + }, + { + "style": "normal", + "_key": "919ee4be64cb", + "markDefs": [], + "children": [ + { + "_key": "91775fdb33e00", + "_type": "span", + "marks": [], + "text": "Data Studios can be started and stopped at-will, preserving state at every step. This includes all code, output and metadata, ensuring minimum costs are incurred compared to managing independent, dedicated analysis VMs. And all while providing fault tolerance, improved reproducibility, and portability of analyses." + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "aae0e9bff38b0", + "_type": "span", + "marks": [], + "text": "State is preserved via timestamped snapshots of the Data Studio environment. Individual snapshots can optionally be renamed for improved discoverability, and used as the base template for a new Data Studio, preserving the complete analysis history and allowing experimentation without impacting the original analysis environment." + } + ], + "_type": "block", + "style": "normal", + "_key": "788d1a1a9db2" + }, + { + "asset": { + "_ref": "image-b752c9e4a00bcd028391a8265b31992b7bdf04d0-1313x859-png", + "_type": "reference" + }, + "_type": "image", + "_key": "babd96dc2978" + }, + { + "_type": "block", + "style": "h2", + "_key": "436881fdd30c", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Not just for analysis", + "_key": "1adec5edc4090" + } + ] + }, + { + "style": "normal", + "_key": "9d9a7cf2c0e5", + "markDefs": [ + { + "href": "https://code.visualstudio.com/docs/remote/vscode-server", + "_key": "a227a4250f32", + "_type": "link" + } + ], + "children": [ + { + "_key": "9ab1335a4b120", + "_type": "span", + "marks": [], + "text": "Beyond analysts and data scientists, Data Studios are a powerful tool for bioinformaticians developing workflows. In this initial release, we offer a Data Studios template for Microsoft’s " + }, + { + "_key": "9ab1335a4b121", + "_type": "span", + "marks": [ + "a227a4250f32" + ], + "text": "VS Code Server" + }, + { + "marks": [], + "text": " — a web-based version of the popular VS Code IDE commonly used by Nextflow pipeline developers.", + "_key": "9ab1335a4b122", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "724074849cc4", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Unlike the current process where developers typically build and test Nextflow modules and pipelines locally, Data Studios facilitates building, testing, and troubleshooting pipelines in production environments using cloud executors and real data.", + "_key": "0b117cdb56d50" + } + ] + }, + { + "_key": "9323e2b0b843", + "markDefs": [], + "children": [ + { + "_key": "e9a49bc43c070", + "_type": "span", + "marks": [], + "text": "Software issues commonly appear when running in specific environments or with particular datasets. Faced with a problem, developers can simply enter their familiar IDE in Data Studios and begin troubleshooting the issue live and in context using real pipeline data." + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "h2", + "_key": "786c5d5206f3", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Looking forward", + "_key": "0526e2f1baaa0", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_key": "cebbe88ca952", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/blog/introducing-data-explorer/", + "_key": "90e54ff7b10a" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Just as ", + "_key": "c34f69a868af0" + }, + { + "marks": [ + "90e54ff7b10a" + ], + "text": "Data Explorer", + "_key": "c34f69a868af1", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " boosts productivity for researchers and analysts, Data Studios does the same for data scientists. Data Explorer enables researchers to easily access and manage data residing in cloud storage buckets from within Seqera Platform, without switching to external environments like the Amazon S3 console. Similarly, Data Studios enables users to easily launch interactive open science tools to analyze data in-context — no matter where the pipelines run or the output data resides — and use those analyses to inform colleagues in real-time with critical updates to pivot experimental approaches or methodologies. By combining Data Explorer, Pipelines, and Data Studios, Seqera Platform helps guide teams through the scientific data lifecycle enabling:", + "_key": "c34f69a868af2" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "048875dc524f", + "listItem": "number", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Simple linking and exploration of data as it’s generated via Data Explorer.", + "_key": "56c8273279270", + "_type": "span" + } + ], + "level": 1 + }, + { + "style": "normal", + "_key": "160fcb340239", + "listItem": "number", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Ability to easily develop, deploy, and scale Pipelines.", + "_key": "f7ce514137250", + "_type": "span" + } + ], + "level": 1, + "_type": "block" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "b52a78e1182c", + "listItem": "number", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Seamless transition from Pipeline output to interactive analysis with Data Studios.", + "_key": "77a3907646b50", + "_type": "span" + } + ] + }, + { + "style": "normal", + "_key": "46b053085fb1", + "markDefs": [], + "children": [ + { + "_key": "cfc21d68932c0", + "_type": "span", + "marks": [], + "text": "While work continues, Data Studios represents a significant step forward. In the coming months, we'll continue developing additional features including support for custom templates, a cost estimator, resource labels, and improved integration across the Seqera Platform." + } + ], + "_type": "block" + }, + { + "markDefs": [ + { + "_key": "a375f3e9e299", + "_type": "link", + "href": "https://www.illumina.com/products/by-type/informatics-products/basespace-sequence-hub/apps/integrative-genomics-viewer.html" + }, + { + "_type": "link", + "href": "https://github.com/Xpra-org/xpra/tree/master", + "_key": "0f7c000bf363" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Much as the nf-core community builds and curates production-quality pipelines and modules, we envision a similar catalog of Data Studio templates in the future comprising additional interactive analysis tools, such as ", + "_key": "50e028c069d90" + }, + { + "_type": "span", + "marks": [ + "a375f3e9e299" + ], + "text": "Integrative Genomics Viewer", + "_key": "50e028c069d91" + }, + { + "marks": [], + "text": " (IGV) and web-based IDEs such as ", + "_key": "50e028c069d92", + "_type": "span" + }, + { + "text": "xpra", + "_key": "50e028c069d93", + "_type": "span", + "marks": [ + "0f7c000bf363" + ] + }, + { + "marks": [], + "text": ".", + "_key": "50e028c069d94", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "fc3c736fd7db" + }, + { + "style": "h2", + "_key": "73553d428748", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Learning more", + "_key": "6339031fea9f0", + "_type": "span" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "You can view running Data Studios today in the Seqera Platform ", + "_key": "a765d985bb520" + }, + { + "text": "Community Showcase workspace", + "_key": "a765d985bb521", + "_type": "span", + "marks": [ + "d64d0b1e03f4" + ] + }, + { + "_type": "span", + "marks": [], + "text": ". To enable Data Studios for your own organization, reach out to your Seqera Account Manager or start a ", + "_key": "a765d985bb522" + }, + { + "text": "free-trial", + "_key": "a765d985bb523", + "_type": "span", + "marks": [ + "e8e9efad9a15" + ] + }, + { + "marks": [], + "text": " today.", + "_key": "a765d985bb524", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "f85467450616", + "markDefs": [ + { + "_key": "d64d0b1e03f4", + "_type": "link", + "href": "https://cloud.seqera.io/orgs/community/workspaces/showcase/" + }, + { + "href": "https://cloud.seqera.io/login", + "_key": "e8e9efad9a15", + "_type": "link" + } + ] + } + ], + "tags": [ + { + "_type": "reference", + "_key": "8e3ca4dcbba0", + "_ref": "82fd60f1-c6d0-4b8a-9c5d-f971c622f341" + }, + { + "_type": "reference", + "_key": "25faaa6c567d", + "_ref": "f1d61674-9374-4d2c-97c2-55778db7c922" + }, + { + "_type": "reference", + "_key": "c757b6d26455", + "_ref": "2b5c9a56-b491-42aa-b291-86611d77ccec" + } + ], + "meta": { + "noIndex": false, + "slug": { + "_type": "slug", + "current": "data-studios-announcement" + }, + "_type": "meta", + "shareImage": { + "asset": { + "_ref": "image-a9c10263524a5906891633a2c94da730848d3ba8-1200x628-png", + "_type": "reference" + }, + "_type": "image" + }, + "description": "An overview of Data Studios - a new feature in Seqera Platform that enables analysts and data scientists to add interactive envrionments." + } + }, + { + "tags": [ + { + "_type": "reference", + "_key": "f30d3e591314", + "_ref": "b6511053-299b-4aa5-8957-94fb9ebc9493" + }, + { + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058", + "_type": "reference", + "_key": "4525d8907a1f" + }, + { + "_key": "7c9827906277", + "_ref": "ab59634e-a349-468d-8f99-cb9fe4c38228", + "_type": "reference" + } + ], + "_rev": "347cad33-9d92-4365-ba09-18e6c2a688a3", + "body": [ + { + "_key": "fc11d5317163", + "markDefs": [], + "children": [ + { + "marks": [ + "em" + ], + "text": "This is a joint article contributed to the Seqera blog by Jon Manning of Seqera and Felix Krueger of Altos Labs describing the new nf-core/riboseq pipeline.", + "_key": "8c2ee84cdf5e0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "a96b84f9b665", + "markDefs": [ + { + "_key": "39d86b09469d", + "_type": "link", + "href": "https://nf-co.re/" + }, + { + "href": "https://nf-co.re/riboseq", + "_key": "f22304d582ae", + "_type": "link" + }, + { + "href": "https://en.wikipedia.org/wiki/Ribosome_profiling", + "_key": "23797f8146f8", + "_type": "link" + } + ], + "children": [ + { + "text": "In April 2024, the bioinformatics community welcomed a significant addition to the ", + "_key": "5355407782e60", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "39d86b09469d" + ], + "text": "nf-core", + "_key": "5355407782e61", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " suite: the ", + "_key": "5355407782e62" + }, + { + "_key": "5355407782e63", + "_type": "span", + "marks": [ + "f22304d582ae" + ], + "text": "nf-core/riboseq" + }, + { + "marks": [], + "text": " pipeline. This new tool, born from a collaboration between Altos Labs and Seqera, underscores the potential of strategic partnerships to advance scientific research. In this article, we provide some background on the project, offer details on the pipeline, and explain how readers can get started with ", + "_key": "5355407782e64", + "_type": "span" + }, + { + "_key": "5355407782e65", + "_type": "span", + "marks": [ + "23797f8146f8" + ], + "text": "Ribo-seq" + }, + { + "_key": "5355407782e66", + "_type": "span", + "marks": [], + "text": " analysis." + } + ] + }, + { + "_key": "ff2e29964409", + "markDefs": [], + "children": [ + { + "_key": "06511e51fc0b", + "_type": "span", + "marks": [], + "text": "A Fruitful Collaboration" + } + ], + "_type": "block", + "style": "h2" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Altos Labs is known for its ambitious efforts in harnessing cellular rejuvenation to reverse disease, injury, and disabilities that can occur throughout life. Their scientific strategy heavily relies on understanding cellular mechanisms via advanced technologies. Ribo-seq provides insights into the real-time translation of proteins, a core process often dysregulated during aging and disease. Altos Labs needed a way to ensure reliable, reproducible Ribo-seq analysis that its research teams could use. While a Ribo-seq pipeline had been started in nf-core, limited progress had been made. Seqera seemed the ideal partner to help build one!", + "_key": "ef4460f305a4" + } + ], + "_type": "block", + "style": "normal", + "_key": "212704cdad6c" + }, + { + "_type": "block", + "style": "normal", + "_key": "3a4e325a6885", + "markDefs": [ + { + "href": "https://seqera.io/nextflow/", + "_key": "afd8d4976f75", + "_type": "link" + }, + { + "_type": "link", + "href": "https://www.zs.com/", + "_key": "8fc76bfd5785" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Seqera, known for creating and developing the ", + "_key": "402551d96a99" + }, + { + "marks": [ + "afd8d4976f75" + ], + "text": "Nextflow DSL", + "_key": "a11895ee51be", + "_type": "span" + }, + { + "text": " and being an active partner in establishing community standards on nf-core, brought the expertise needed to translate Altos Labs' vision into a viable community pipeline. As part of this collaboration, we formed a working group and also reached out to colleagues at ", + "_key": "206247a437cc", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "8fc76bfd5785" + ], + "text": "ZS", + "_key": "da520fc0d7f3" + }, + { + "_key": "c8e26b5b7392", + "_type": "span", + "marks": [], + "text": " and other community members who had done prior work with Ribosome profiling in Nextflow. Our goal was not only to enhance Ribo-seq analysis capabilities but also to ensure the pipeline’s sustainability through a community-driven process." + } + ] + }, + { + "_key": "110443549dbc", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Development Insights", + "_key": "023772c169b7" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_type": "block", + "style": "normal", + "_key": "1bb6d0dcf94a", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "The nf-core/riboseq project was structured into several phases:", + "_key": "fcef7ffc7722", + "_type": "span" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "em" + ], + "text": "Initial planning", + "_key": "04af5c122b050" + }, + { + "_type": "span", + "marks": [], + "text": ": This phase involved detailed discussions between the Scientific Development team at Seqera, Altos Labs, and expert partners to ensure alignment with best practices and effective tool selection.", + "_key": "04af5c122b051" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "9bd450582af3", + "listItem": "bullet" + }, + { + "listItem": "bullet", + "markDefs": [ + { + "href": "https://nf-co.re/rnaseq", + "_key": "3fa0f88295d5", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "em" + ], + "text": "Adapting existing components", + "_key": "4eb6302b38970" + }, + { + "_type": "span", + "marks": [], + "text": ": Key pre-processing and alignment functions were adapted from the ", + "_key": "4eb6302b38971" + }, + { + "text": "nf-core/rnaseq", + "_key": "4eb6302b38972", + "_type": "span", + "marks": [ + "3fa0f88295d5" + ] + }, + { + "marks": [], + "text": " pipeline, allowing for shareability, efficiency, and scalability.", + "_key": "4eb6302b38973", + "_type": "span" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "ef189e78f7f5" + }, + { + "_key": "dc6acae62561", + "listItem": "bullet", + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/zhpn1024/ribotish", + "_key": "6be1a3f37f71" + }, + { + "_type": "link", + "href": "https://github.com/smithlabcode/ribotricer", + "_key": "67a956a543b0" + }, + { + "_type": "link", + "href": "https://www.bioconductor.org/packages/release/bioc/html/anota2seq.html", + "_key": "5f9cca0d1922" + }, + { + "href": "https://biocontainers.pro/", + "_key": "a24a587b6c75", + "_type": "link" + }, + { + "_type": "link", + "href": "https://github.com/nf-core/modules", + "_key": "d813571ed2e7" + } + ], + "children": [ + { + "marks": [ + "em" + ], + "text": "New tool integration", + "_key": "f59020155b400", + "_type": "span" + }, + { + "marks": [], + "text": ": Specific tools for Ribo-seq analysis, such as ", + "_key": "f59020155b401", + "_type": "span" + }, + { + "marks": [ + "6be1a3f37f71" + ], + "text": "Ribo-TISH", + "_key": "f59020155b402", + "_type": "span" + }, + { + "text": ", ", + "_key": "f59020155b403", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "67a956a543b0" + ], + "text": "Ribotricer", + "_key": "f59020155b404" + }, + { + "_type": "span", + "marks": [], + "text": ", and ", + "_key": "f59020155b405" + }, + { + "marks": [ + "5f9cca0d1922" + ], + "text": "anota2seq", + "_key": "f59020155b406", + "_type": "span" + }, + { + "_key": "f59020155b407", + "_type": "span", + "marks": [], + "text": ", were wrapped into modules using " + }, + { + "_type": "span", + "marks": [ + "a24a587b6c75" + ], + "text": "Biocontainers", + "_key": "f59020155b408" + }, + { + "_type": "span", + "marks": [], + "text": ", within comprehensive testing frameworks to prevent regression and ensure reliability. These components were contributed to the ", + "_key": "f59020155b409" + }, + { + "_type": "span", + "marks": [ + "d813571ed2e7" + ], + "text": "nf-core/modules", + "_key": "f59020155b4010" + }, + { + "text": " repository, which will now be available for the wider community to reuse, independent of this effort.", + "_key": "f59020155b4011", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "27cae4355e4d0", + "_type": "span", + "marks": [ + "em" + ], + "text": "Pipeline development" + }, + { + "marks": [], + "text": ": Individual components were stitched together coherently to create the nf-core/riboseq pipeline, with its own testing framework and user documentation.", + "_key": "27cae4355e4d1", + "_type": "span" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "06ef45923942" + }, + { + "_key": "a11532d70cbc", + "markDefs": [], + "children": [ + { + "_key": "9af59990c0c00", + "_type": "span", + "marks": [], + "text": "Technical and Community Challenges" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "449d8032618a", + "markDefs": [], + "children": [ + { + "text": "Generalizing existing functionality", + "_key": "262d18dad67e0", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h3" + }, + { + "markDefs": [ + { + "_key": "601d56009a00", + "_type": "link", + "href": "https://nf-co.re/modules" + }, + { + "_key": "9d9691a3a5b4", + "_type": "link", + "href": "https://nf-co.re/subworkflows" + } + ], + "children": [ + { + "marks": [], + "text": "nf-core has become an encyclopedia of components, including ", + "_key": "48bd49dd01300", + "_type": "span" + }, + { + "_key": "48bd49dd01301", + "_type": "span", + "marks": [ + "601d56009a00" + ], + "text": "modules" + }, + { + "_type": "span", + "marks": [], + "text": " and ", + "_key": "48bd49dd01302" + }, + { + "_type": "span", + "marks": [ + "9d9691a3a5b4" + ], + "text": "subworkflows", + "_key": "48bd49dd01303" + }, + { + "marks": [], + "text": " that developers can leverage to build Nextflow pipelines. RNA-seq data analysis, in particular, is well served by the nf-core/rnaseq pipeline, one of the longest-standing and most popular members of the nf-core community. Some of the components used in nf-core/rnaseq were not written with re-use in mind, so the first task in this project was to abstract the commodity components for processes such as preprocessing and quantification so that they could be effectively shared by the nf-core/riboseq pipeline.", + "_key": "48bd49dd01304", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "af3382a99d21" + }, + { + "markDefs": [], + "children": [ + { + "_key": "68d04249013b0", + "_type": "span", + "marks": [], + "text": "Test dataset generation" + } + ], + "_type": "block", + "style": "h3", + "_key": "5669adb1dcd3" + }, + { + "markDefs": [], + "children": [ + { + "text": "Another significant hurdle was generating robust test data capable of supporting the ongoing quality assurance of our software. In Ribo-seq analysis, the basic operation of some tools depends on the quality of input data, so random down-sampling of variable quality input reads, especially at shallow depths may not be useful to generate test data. To overcome this, we implemented a targeted down-sampling strategy, selectively using input reads that meet high-quality standards and are known to align well with a specific chromosome. This method enabled us to produce a concise yet effective test data set, ensuring that our Ribo-seq tools operate reliably under realistic conditions.", + "_key": "a4da2ac411130", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "2767c14b9d80" + }, + { + "style": "h3", + "_key": "2aaebc117fde", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Tool selection", + "_key": "27bceef25c9e0" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "1cb88d14f05e", + "markDefs": [], + "children": [ + { + "_key": "42c9c78112020", + "_type": "span", + "marks": [], + "text": "A primary challenge in developing the pipeline was the selection of high-quality, sustainable software. In bioinformatics, funding often limits software development, and many tools are poorly maintained. Furthermore, the understanding of what software 'works' can be ambiguous, embedded in the community's shared knowledge rather than documented formally. Our cooperative approach enabled us to make informed decisions and contribute improvements to the underlying software, enhancing utility for users beyond the nf-core community." + } + ], + "_type": "block" + }, + { + "style": "h3", + "_key": "d1e0de03d5a1", + "markDefs": [], + "children": [ + { + "text": "Parameter selection", + "_key": "b2c37914fe590", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "2523548b9954", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Selecting the correct parameter settings for optimal operation of bioinformatics tools is a perennial problem in the community. In particular, the settings for the STAR alignment algorithm have very different constraints in Ribo-seq analysis relative to generic RNA-seq analysis. We conducted a series of benchmarks to assess the impact on alignment statistics of various combinations of parameters. We settled on a starting set, but this is a subject of continuing discussion with community members to drive further optimizations.", + "_key": "decd6cfc25240" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "9a31de208e060", + "_type": "span", + "marks": [], + "text": "Pipeline Features" + } + ], + "_type": "block", + "style": "h2", + "_key": "a8c53464a53f" + }, + { + "_type": "block", + "style": "normal", + "_key": "45f1476190e5", + "markDefs": [], + "children": [ + { + "_key": "f51ea64a9e180", + "_type": "span", + "marks": [], + "text": "The nf-core/riboseq pipeline is now a robust framework written using the nf-core pipeline template, and specifically tailored to handle the complexities of Ribo-seq data analysis." + } + ] + }, + { + "asset": { + "_ref": "image-83f90945d29b41fcdc562789b06f3abbdbfa4d9a-1010x412-png", + "_type": "reference" + }, + "_type": "image", + "_key": "9024177c2c73" + }, + { + "_key": "c4c2c021e47b", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Here is what it offers:", + "_key": "3460577cae3f", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Baseline read preprocessing using processes adapted from existing nf-core components.", + "_key": "5e7ebc27391f0", + "_type": "span" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "cfb811774489", + "listItem": "bullet" + }, + { + "_key": "f78073ef3267", + "listItem": "bullet", + "markDefs": [ + { + "_key": "159e3bc6217d", + "_type": "link", + "href": "https://github.com/alexdobin/STAR" + } + ], + "children": [ + { + "text": "Alignment to references with ", + "_key": "4ce6dc424aed0", + "_type": "span", + "marks": [] + }, + { + "text": "STAR", + "_key": "4ce6dc424aed1", + "_type": "span", + "marks": [ + "159e3bc6217d" + ] + }, + { + "_key": "4ce6dc424aed2", + "_type": "span", + "marks": [], + "text": ", producing both transcriptome and genome alignments." + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "3cdb46402566", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "1b345d3fa4f80", + "_type": "span", + "marks": [], + "text": "Analysis of read distribution around protein-coding regions to assess frame bias and P-site offsets. This produces a rich selection of diagnostic plots to assess Ribo-seq data quality." + } + ], + "level": 1 + }, + { + "_key": "9e3414d59445", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Prediction and identification of translated open reading frames using tools like Ribo-TISH and Ribotricer.", + "_key": "3299c56efe000", + "_type": "span" + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "9e8c117a96a2", + "markDefs": [], + "children": [ + { + "text": "Assessment of translational efficiency, which requires matched RNA-seq and Ribo-seq data, facilitated by the anota2seq Bioconductor package (see dot plot below).", + "_key": "c39d9d7b14f8", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_type": "image", + "_key": "7122c68ade88", + "asset": { + "_type": "reference", + "_ref": "image-ca5f9967df813470051fcf548e962bdbf4c50ee5-624x624-png" + } + }, + { + "_key": "067ad9c9d6d7", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "em" + ], + "text": "An example result from anota2seq, a tool used to study gene expression, shows how transcription and translation are connected. The x-axis shows changes in overall mRNA levels (transcription) between a treated and a control group, while the y-axis displays changes in the rate of protein synthesis (translation) between those groups, as measured by Ribo-seq. Grey points represent genes with no significant change in either metric and most points align near the center of the x-axis, indicating little change in mRNA levels. However, some genes exhibit increased (orange) or decreased (red) protein synthesis, suggesting direct regulation of translation rather than changes driven solely by mRNA abundance.", + "_key": "57c0e67a28250" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://nf-co.re/riboseq/#usage", + "_key": "34ab33c4a8e1" + }, + { + "_type": "link", + "href": "https://nfcore.slack.com/channels/riboseq", + "_key": "218183b5348d" + } + ], + "children": [ + { + "_key": "e5078088e49b0", + "_type": "span", + "marks": [], + "text": "If you are a researcher interested in Ribo-seq data analysis, you can test the pipeline by following the instructions in the " + }, + { + "_type": "span", + "marks": [ + "34ab33c4a8e1" + ], + "text": "getting started", + "_key": "e5078088e49b1" + }, + { + "marks": [], + "text": " section of the pipeline. Please feel free to submit bugs and feature requests to drive ongoing improvements. You can also become part of the conversation by joining the ", + "_key": "e5078088e49b2", + "_type": "span" + }, + { + "text": "#riboseq", + "_key": "e5078088e49b3", + "_type": "span", + "marks": [ + "218183b5348d" + ] + }, + { + "text": " channel in the nf-core community Slack workspace. We would love to see you there!", + "_key": "e5078088e49b4", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "46beba019134" + }, + { + "markDefs": [], + "children": [ + { + "_key": "bd13a8c55f6e", + "_type": "span", + "marks": [], + "text": "Next Steps" + } + ], + "_type": "block", + "style": "h2", + "_key": "515022911e71" + }, + { + "_type": "block", + "style": "normal", + "_key": "2d75d51ff270", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Following this initial phase of work, Seqera and Altos Labs have handed over the nf-core/riboseq pipeline to the nf-core community for ongoing maintenance and development. As members of that community, we will continue to play a part in enhancing the pipeline going forward. We hope others will benefit from this effort and continue to improve and refine pipeline functionality.", + "_key": "14a152a9174f0", + "_type": "span" + } + ] + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/iraiosub/riboseq-flow", + "_key": "46fa6099abc2" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Coincidentally the authors of ", + "_key": "98347010c2330" + }, + { + "_key": "98347010c2331", + "_type": "span", + "marks": [ + "46fa6099abc2" + ], + "text": "riboseq-flow" + }, + { + "_key": "98347010c2332", + "_type": "span", + "marks": [], + "text": " published their related work on the same day that nf-core/riboseq was first released. This pipeline has a highly complementary set of steps, and there is already ongoing collaboration to work together to build an even better community resource." + } + ], + "_type": "block", + "style": "normal", + "_key": "09c10fe38376" + }, + { + "children": [ + { + "marks": [], + "text": "Empowering Research and Innovation", + "_key": "e5fdf870848b0", + "_type": "span" + } + ], + "_type": "block", + "style": "h2", + "_key": "c566b4d435e3", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The joint contribution of Seqera and Altos Labs to the nf-core/riboseq pipeline highlights how collaboration between industry and open-source communities can result in tools that push scientific boundaries and foster community engagement and development. By adhering to rigorous code quality and testing standards, nf-core/riboseq ensures researchers access to a dependable, cutting-edge tool.", + "_key": "35352a1b306b0" + } + ], + "_type": "block", + "style": "normal", + "_key": "99da8271ab0f" + }, + { + "markDefs": [], + "children": [ + { + "_key": "53386085eb760", + "_type": "span", + "marks": [], + "text": "We believe this new pipeline is poised to be vital in studying protein synthesis and its implications for aging and health. This is not just a technical achievement - it's a step forward in collaborative, open scientific progress." + } + ], + "_type": "block", + "style": "normal", + "_key": "56719298b452" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "If you have a project in mind where Seqera may be able to help with our Professional Services offerings, please contact us at ", + "_key": "cafe02f0755d" + }, + { + "_key": "53386085eb761", + "_type": "span", + "marks": [ + "ccafa728bca7" + ], + "text": "services@seqera.io" + }, + { + "text": ". We are the content experts for Nextflow, nf-core, and the Seqera Platform, and can offer tailored solutions and expert guidance to help you fulfill your objectives.", + "_key": "53386085eb762", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "6e42514da79e", + "markDefs": [ + { + "_key": "ccafa728bca7", + "_type": "link", + "href": "mailto:services@seqera.io" + } + ] + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://www.altoslabs.com/", + "_key": "026178e92bb6" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "To learn more about Altos Labs, visit ", + "_key": "3babdea8c79d0" + }, + { + "_key": "3babdea8c79d1", + "_type": "span", + "marks": [ + "026178e92bb6" + ], + "text": "https://www.altoslabs.com/" + }, + { + "_key": "3babdea8c79d2", + "_type": "span", + "marks": [], + "text": "." + } + ], + "_type": "block", + "style": "normal", + "_key": "a5dc365dc556" + }, + { + "_key": "5b95f381569b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Acknowledgments", + "_key": "48b61c9282e00" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "258428890647", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "nf-core/riboseq was initially written by Jonathan Manning (Bioinformatics Engineer at Seqera) in collaboration with Felix Krueger and Christel Krueger (Altos Labs). The development work carried out on the pipeline was funded by Altos Labs. We thank the following people for their input (", + "_key": "d836d0eff50e0" + }, + { + "_key": "d836d0eff50e1", + "_type": "span", + "marks": [ + "em" + ], + "text": "in alphabetical order" + }, + { + "text": "):", + "_key": "d836d0eff50e2", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Felipe Almeida (ZS)", + "_key": "376c006c20de0" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "be9ad649bb8d", + "listItem": "bullet", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "abb0a8d9fba2", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Anne Bresciani (ZS)", + "_key": "6046a5e41c110" + } + ], + "level": 1 + }, + { + "_key": "31c2f31a40bc", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Caroline Eastwood (University of Edinburgh)", + "_key": "040c3d125ae60" + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "ce8f076685cf", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "Maxime U Garcia (Seqera)", + "_key": "f3c530a930470", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "7b34ffefab7d", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Mikhail Osipovitch (ZS)", + "_key": "e21649c58e7b0" + } + ] + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "02884c22d195", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Jack Tierney (University College Cork)", + "_key": "1f18a294d9a20", + "_type": "span" + } + ] + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "86b2bce07178", + "_type": "span", + "marks": [], + "text": "Edward Wallace (University of Edinburgh)\n\n" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "8f03c90bd810" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "1da880ad30a0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "e59fb1d47363" + }, + { + "_type": "block", + "style": "normal", + "_key": "736ce4dde440", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "\n\n", + "_key": "1c8d35ffcae9" + } + ] + } + ], + "_updatedAt": "2024-07-15T14:51:14Z", + "title": "nf-core/riboseq: A collaboration between Altos Labs and Seqera", + "author": { + "_ref": "109f0c7b-3d40-42a9-af77-3844f0e031c0", + "_type": "reference" + }, + "meta": { + "shareImage": { + "_type": "image", + "asset": { + "_type": "reference", + "_ref": "image-10399aee1fa48e4250f2e7ab3c7fb76ca3aa1ac4-1200x628-png" + } + }, + "description": "nf-core/riboseq: A collaboration between Altos Labs and Seqera", + "noIndex": false, + "slug": { + "current": "nf-core-riboseq", + "_type": "slug" + }, + "_type": "meta" + }, + "_id": "drafts.0d583937-1d7f-4c31-9e79-d8f1e5f2a2da", + "publishedAt": "2024-05-15T13:59:00.000Z", + "_createdAt": "2024-05-13T11:54:28Z", + "_type": "blogPost" + }, + { + "body": [ + { + "style": "normal", + "_key": "b2c90a845577", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "em" + ], + "text": "This is a joint blog post by Chris Wright of Oxford Nanopore Technologies and Paolo Di Tommaso of Seqera. ", + "_key": "dce4c210802c0" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "b6d54206379d", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "aa2ec0b99c58" + }, + { + "_key": "e3f059b3c10d", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Introduction", + "_key": "790a05fdbdd2" + } + ], + "_type": "block", + "style": "h2" + }, + { + "_key": "76489b73dd57", + "markDefs": [ + { + "href": "https://nf-co.re/", + "_key": "2ed12be893e0", + "_type": "link" + } + ], + "children": [ + { + "text": "Besides the well-known ", + "_key": "d26917496f3f0", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "2ed12be893e0" + ], + "text": "nf-core", + "_key": "34b66355584b" + }, + { + "_key": "5ad48037f99f", + "_type": "span", + "marks": [], + "text": ", there are several collections of high-quality Nextflow pipelines and modules, including:" + } + ], + "_type": "block", + "style": "normal" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "8f0e7727b92d", + "listItem": "bullet", + "markDefs": [ + { + "href": "https://www.iarc.who.int/", + "_key": "86ec76a9bc7e", + "_type": "link" + } + ], + "children": [ + { + "text": "Nextflow pipelines from the International Agency for Research on Cancer (", + "_key": "ec376d926cdb0", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "86ec76a9bc7e" + ], + "text": "IARC", + "_key": "ec376d926cdb1" + }, + { + "marks": [], + "text": ")", + "_key": "ec376d926cdb2", + "_type": "span" + } + ] + }, + { + "listItem": "bullet", + "markDefs": [ + { + "_key": "56f0498ec45b", + "_type": "link", + "href": "https://github.com/UMCUGenetics/NextflowModules" + }, + { + "_type": "link", + "href": "https://github.com/UMCUGenetics/", + "_key": "631125797903" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Various pipelines and ", + "_key": "51e2182400bb0" + }, + { + "_type": "span", + "marks": [ + "56f0498ec45b" + ], + "text": "Nextflow modules", + "_key": "51e2182400bb1" + }, + { + "_key": "51e2182400bb2", + "_type": "span", + "marks": [], + "text": " maintained by " + }, + { + "marks": [ + "631125797903" + ], + "text": "UMCU Genetics", + "_key": "51e2182400bb3", + "_type": "span" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "630bd0a71a60" + }, + { + "style": "normal", + "_key": "d178804486d4", + "listItem": "bullet", + "markDefs": [ + { + "_key": "cd9ee1a357db", + "_type": "link", + "href": "https://github.com/qbic-pipelines" + } + ], + "children": [ + { + "_type": "span", + "marks": [ + "cd9ee1a357db" + ], + "text": "QBiC pipelines", + "_key": "92c0024f2cc10" + }, + { + "_type": "span", + "marks": [], + "text": " maintained at the University of Tübingen", + "_key": "92c0024f2cc11" + } + ], + "level": 1, + "_type": "block" + }, + { + "markDefs": [ + { + "_key": "d9624fd0014f", + "_type": "link", + "href": "https://labs.epi2me.io/wfindex/" + }, + { + "_type": "link", + "href": "https://labs.epi2me.io/wfindex/", + "_key": "6808d696dec7" + } + ], + "children": [ + { + "_key": "22a465de00660", + "_type": "span", + "marks": [], + "text": "We thought it was high time that we gave some attention to " + }, + { + "_type": "span", + "marks": [ + "6808d696dec7" + ], + "text": "EPI2ME", + "_key": "22a465de00661" + }, + { + "text": "™", + "_key": "9a7e6b39ba16", + "_type": "span", + "marks": [ + "d9624fd0014f" + ] + }, + { + "marks": [ + "6808d696dec7" + ], + "text": " Workflows", + "_key": "dfdcc3d45166", + "_type": "span" + }, + { + "marks": [], + "text": " - another set of professionally maintained pipelines developed by the EPI2ME team at Oxford Nanopore Technologies (ONT).", + "_key": "22a465de00662", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "86cecd91f447" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "In this article, we discuss the workflows, compare them to similar pipelines from nf-core, and explain how users can easily get started using software from EPI2ME™ or the Seqera Platform.", + "_key": "b490da57e51b0" + } + ], + "_type": "block", + "style": "normal", + "_key": "27751fed9302" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "About Oxford Nanopore", + "_key": "3aeea92ca74e0" + } + ], + "_type": "block", + "style": "h2", + "_key": "017862dd41da", + "markDefs": [] + }, + { + "style": "normal", + "_key": "b0c3d54fcfbb", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Oxford Nanopore Technologies was founded in 2005 as a spin-off from the University of Oxford in the UK. The company has developed, commercialized, and continues to innovate on a new generation of sensing technology that uses nanopores - nano-scale holes - embedded in high-tech electronics to perform comprehensive analyses of single molecules.", + "_key": "57b1e1ef5de70" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Oxford Nanopore’s first products sequence DNA and RNA. The technology offers scalability from portable to ultra-high throughput formats that are appropriate for broad use. This combines with real-time data delivery for rapid insights and dynamic workflows, and PCR-free sequencing of any length of fragment for the ability to accurately characterize biological variation.", + "_key": "1a04c455f8d80", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "eb44b80b01f0" + }, + { + "_type": "block", + "style": "normal", + "_key": "22be926d3f99", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Today, Oxford Nanopore offers a range of products, from laboratory preparation and automation solutions to sequencers to software tools for analysis, in addition to industry-leading sequencing products, which range from the portable MinION™ sequencer to benchtop GridION™ devices with integrated compute to their high-throughput PromethION™ series with up to 48 independently addressable flow cells.", + "_key": "d17e2d990fb90" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "EPI2ME", + "_key": "636a4e1482120" + } + ], + "_type": "block", + "style": "h2", + "_key": "a930887c907e" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "The EPI2ME team within Oxford Nanopore is composed of a dozen people with expertise in diverse fields, from genetics to computational biology and focus on a variety of workflows from microbiology to clinical research applications. EPI2ME provides Oxford Nanopore’s open-source bioinformatics platform and develops and maintains pipelines tailored to nanopore sequencing data.", + "_key": "95ad2fd668020" + } + ], + "_type": "block", + "style": "normal", + "_key": "e93d0e1a78e4", + "markDefs": [] + }, + { + "style": "normal", + "_key": "fad46be12fdd", + "markDefs": [], + "children": [ + { + "text": "In addition to maintaining Nextflow pipelines, the EPI2ME team also supports:", + "_key": "eab61eeb5cd00", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "8afaafe65af7", + "listItem": "bullet", + "markDefs": [ + { + "href": "https://registry.opendata.aws/ont-open-data/", + "_key": "c6146d464d70", + "_type": "link" + } + ], + "children": [ + { + "text": "Oxford Nanopore’s Open Data provided through the ", + "_key": "40fe29c2695f0", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "c6146d464d70" + ], + "text": "registry of open data on AWS", + "_key": "40fe29c2695f1" + }, + { + "marks": [], + "text": ".", + "_key": "40fe29c2695f2", + "_type": "span" + } + ], + "level": 1 + }, + { + "_type": "block", + "style": "normal", + "_key": "1ae98d791b54", + "listItem": "bullet", + "markDefs": [ + { + "_key": "6983f59d3163", + "_type": "link", + "href": "https://labs.epi2me.io/downloads/" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The ", + "_key": "9fa05d8343900" + }, + { + "_key": "c573fa95e973", + "_type": "span", + "marks": [ + "6983f59d3163" + ], + "text": "EPI2ME desktop application" + }, + { + "_type": "span", + "marks": [], + "text": " for Windows, Mac and Linux for running workflows locally or in the cloud.", + "_key": "40fd725ad2bd" + } + ], + "level": 1 + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "Various tutorials and training materials for bioinformaticians.", + "_key": "e4c4325f39120", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "5c603b8592eb" + }, + { + "_type": "block", + "style": "normal", + "_key": "5b4604221dde", + "markDefs": [], + "children": [ + { + "_key": "8876e46e8e30", + "_type": "span", + "marks": [], + "text": "" + } + ] + }, + { + "_key": "5e478d3d26a8", + "markDefs": [], + "children": [ + { + "_key": "a3a61ee99a7a0", + "_type": "span", + "marks": [], + "text": "Meet the pipelines" + } + ], + "_type": "block", + "style": "h2" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "The EPI2ME team selected ", + "_key": "476377a62d880" + }, + { + "_key": "476377a62d881", + "_type": "span", + "marks": [ + "181888f825c8" + ], + "text": "Nextflow " + }, + { + "_key": "476377a62d882", + "_type": "span", + "marks": [], + "text": "as their preferred framework for workflows in March of 2021. The pipelines are written using both community provided and bespoke (open-source) tools and maintained by a dedicated team of professional bioinformaticians and software developers. They are extensively documented, and generate high-quality interactive reports." + } + ], + "_type": "block", + "style": "normal", + "_key": "687f06efdeb6", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/nextflow/", + "_key": "181888f825c8" + } + ] + }, + { + "children": [ + { + "text": "The pipelines are freely available from ", + "_key": "5be3d3739c2b0", + "_type": "span", + "marks": [] + }, + { + "text": "GitHub", + "_key": "5be3d3739c2b1", + "_type": "span", + "marks": [ + "6fe1b59ffff9" + ] + }, + { + "text": " and are grouped based on their functionality as follows:", + "_key": "5be3d3739c2b2", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "dff4e1ef2418", + "markDefs": [ + { + "href": "https://github.com/epi2me-labs", + "_key": "6fe1b59ffff9", + "_type": "link" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Pipelines for handling basic tasks such as base calling and alignment: ", + "_key": "ef0f7768b3f60" + }, + { + "_type": "span", + "marks": [ + "fb98094d40d4" + ], + "text": "wf-basecalling", + "_key": "ef0f7768b3f61" + }, + { + "marks": [], + "text": " and ", + "_key": "ef0f7768b3f62", + "_type": "span" + }, + { + "_key": "ef0f7768b3f63", + "_type": "span", + "marks": [ + "d8361b59de86" + ], + "text": "wf-alignment" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "725964a021aa", + "listItem": "bullet", + "markDefs": [ + { + "href": "https://github.com/epi2me-labs/wf-basecalling", + "_key": "fb98094d40d4", + "_type": "link" + }, + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-alignment", + "_key": "d8361b59de86" + } + ] + }, + { + "style": "normal", + "_key": "bd0914ddc3e9", + "listItem": "bullet", + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-human-variation", + "_key": "b4df4a9149c2" + }, + { + "_key": "afcc3659be32", + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-somatic-variation" + } + ], + "children": [ + { + "marks": [], + "text": "Pipelines for studying human genetics and cancer: ", + "_key": "eadc5a77d5bf0", + "_type": "span" + }, + { + "_key": "eadc5a77d5bf1", + "_type": "span", + "marks": [ + "b4df4a9149c2" + ], + "text": "wf-human-variation" + }, + { + "marks": [], + "text": " and ", + "_key": "eadc5a77d5bf2", + "_type": "span" + }, + { + "marks": [ + "afcc3659be32" + ], + "text": "wf-somatic-variation", + "_key": "eadc5a77d5bf3", + "_type": "span" + } + ], + "level": 1, + "_type": "block" + }, + { + "children": [ + { + "marks": [], + "text": "Pipelines for genome assembly: ", + "_key": "bebf1e3dae070", + "_type": "span" + }, + { + "text": "wf-clone-validation", + "_key": "bebf1e3dae071", + "_type": "span", + "marks": [ + "a5974b48d87e" + ] + }, + { + "_type": "span", + "marks": [], + "text": " and ", + "_key": "bebf1e3dae072" + }, + { + "_type": "span", + "marks": [ + "f5cddbb76c9b" + ], + "text": "wf-bacterial-genomes", + "_key": "bebf1e3dae073" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "429c0538478c", + "listItem": "bullet", + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-clone-validation", + "_key": "a5974b48d87e" + }, + { + "href": "https://github.com/epi2me-labs/wf-bacterial-genomes", + "_key": "f5cddbb76c9b", + "_type": "link" + } + ] + }, + { + "listItem": "bullet", + "markDefs": [ + { + "href": "https://github.com/epi2me-labs/wf-metagenomics", + "_key": "52b154d3d1e6", + "_type": "link" + }, + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-16s", + "_key": "d26e8ebf9b62" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Metagenomic analysis pipelines: ", + "_key": "c2ba1980c0c40" + }, + { + "_type": "span", + "marks": [ + "52b154d3d1e6" + ], + "text": "wf-metagenomics", + "_key": "c2ba1980c0c41" + }, + { + "_type": "span", + "marks": [], + "text": " and ", + "_key": "c2ba1980c0c42" + }, + { + "text": "wf-16s", + "_key": "c2ba1980c0c43", + "_type": "span", + "marks": [ + "d26e8ebf9b62" + ] + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "a84c4604286a" + }, + { + "style": "normal", + "_key": "753781e4e05a", + "listItem": "bullet", + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-transcriptomes", + "_key": "eaa9ccd118e1" + }, + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-single-cell", + "_key": "c994c09a8a63" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Direct RNA sequencing and cDNA: ", + "_key": "93376e8595190" + }, + { + "_type": "span", + "marks": [ + "eaa9ccd118e1" + ], + "text": "wf-transcriptomes", + "_key": "93376e8595191" + }, + { + "_type": "span", + "marks": [], + "text": " and ", + "_key": "93376e8595192" + }, + { + "_type": "span", + "marks": [ + "c994c09a8a63" + ], + "text": "wf-single-cell", + "_key": "93376e8595193" + } + ], + "level": 1, + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "e9489bd531f9", + "listItem": "bullet", + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-artic", + "_key": "4742c63b6758" + }, + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-mpx", + "_key": "977ee6511324" + }, + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-flu", + "_key": "df2667b37c87" + }, + { + "href": "https://github.com/epi2me-labs/wf-tb-amr", + "_key": "34c72b732a23", + "_type": "link" + } + ], + "children": [ + { + "text": "Pipelines for infectious disease including SARS-CoV-2, Monkeypox, Influenza, and tuberculosis: ", + "_key": "47c89491d4dd0", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "4742c63b6758" + ], + "text": "wf-artic", + "_key": "47c89491d4dd1" + }, + { + "_type": "span", + "marks": [], + "text": ", ", + "_key": "47c89491d4dd2" + }, + { + "_type": "span", + "marks": [ + "977ee6511324" + ], + "text": "wf-mpx", + "_key": "47c89491d4dd3" + }, + { + "marks": [], + "text": ", ", + "_key": "47c89491d4dd4", + "_type": "span" + }, + { + "_key": "47c89491d4dd5", + "_type": "span", + "marks": [ + "df2667b37c87" + ], + "text": "wf-flu" + }, + { + "marks": [], + "text": ", and ", + "_key": "47c89491d4dd6", + "_type": "span" + }, + { + "marks": [ + "34c72b732a23" + ], + "text": "wf-tb-amr", + "_key": "47c89491d4dd7", + "_type": "span" + } + ], + "level": 1 + }, + { + "style": "normal", + "_key": "f17d24b126a7", + "listItem": "bullet", + "markDefs": [ + { + "href": "https://github.com/epi2me-labs/wf-amplicon", + "_key": "06030687d4a8", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Targeted sequencing pipelines: ", + "_key": "807c964325570" + }, + { + "_type": "span", + "marks": [ + "06030687d4a8" + ], + "text": "wf-amplicon", + "_key": "807c964325571" + } + ], + "level": 1, + "_type": "block" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Other pipelines: ", + "_key": "094f1f7f00750" + }, + { + "marks": [ + "19ef1b8b2717" + ], + "text": "wf-pore-c", + "_key": "094f1f7f00751", + "_type": "span" + }, + { + "marks": [], + "text": ", ", + "_key": "094f1f7f00752", + "_type": "span" + }, + { + "_key": "094f1f7f00753", + "_type": "span", + "marks": [ + "77a1cf496c86" + ], + "text": "wf-aav-qc" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "3d15976a2de7", + "listItem": "bullet", + "markDefs": [ + { + "href": "https://github.com/epi2me-labs/wf-pore-c/", + "_key": "19ef1b8b2717", + "_type": "link" + }, + { + "_type": "link", + "href": "https://github.com/epi2me-labs/wf-aav-qc/", + "_key": "77a1cf496c86" + } + ] + }, + { + "markDefs": [ + { + "href": "https://github.com/epi2me-labs/wf-template", + "_key": "ceb7a9c6f6d3", + "_type": "link" + } + ], + "children": [ + { + "_key": "4169ac3f19a30", + "_type": "span", + "marks": [], + "text": "While the pipelines pre-date some recent nf-core practices, EPI2ME pipelines are DSL2 compliant, modular, and employ their own consistent coding standards. Like nf-core, EPI2ME provides a " + }, + { + "_type": "span", + "marks": [ + "ceb7a9c6f6d3" + ], + "text": "standard template", + "_key": "4169ac3f19a31" + }, + { + "_type": "span", + "marks": [], + "text": " that can be used as the basis for developing new workflows. Community-developed workflows following those standards can be easily integrated in the EPI2ME desktop application, increasing accessibility for other Oxford Nanopore users, through an intuitive graphical interface. The EPI2ME team also publishes the containers for each workflow on Docker Hub and makes them freely available.", + "_key": "4169ac3f19a32" + } + ], + "_type": "block", + "style": "normal", + "_key": "bfaa6f746f6a" + }, + { + "style": "h2", + "_key": "2365c15e1db4", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Running the EPI2ME pipelines", + "_key": "81489acf3c9a0" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "df57dd6eb117", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "For each pipeline, instructions are provided for running:", + "_key": "4a647fd3a5e50" + } + ] + }, + { + "style": "normal", + "_key": "f44b9b736f01", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "From the command line", + "_key": "95054c9cdad40" + } + ], + "level": 1, + "_type": "block" + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "On the Seqera Platform", + "_key": "b3f735b03631", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "03d74a0f0336" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "6b4477b09f0b", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Using the EPI2ME desktop application\n\n", + "_key": "71974dc131350", + "_type": "span" + } + ] + }, + { + "_type": "block", + "style": "h3", + "_key": "8f3cd2df96b4", + "markDefs": [], + "children": [ + { + "_key": "bf20a4228ebe", + "_type": "span", + "marks": [], + "text": "EPI2ME pipelines on Seqera Platform" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "7ed4ff8abf9c", + "markDefs": [ + { + "_type": "link", + "href": "https://seqera.io/platform/", + "_key": "0e4d8583f03d" + } + ], + "children": [ + { + "_key": "0bb16c677f290", + "_type": "span", + "marks": [], + "text": "Since EPI2ME pipelines include a nextflow_schema.json file, pipelines can be adapted for use with the " + }, + { + "_key": "0bb16c677f291", + "_type": "span", + "marks": [ + "0e4d8583f03d" + ], + "text": "Seqera Platform" + }, + { + "_type": "span", + "marks": [], + "text": ", leveraging Seqera’s interactive interface for launching and monitoring pipelines in their preferred HPC or cloud computing environment.", + "_key": "0bb16c677f292" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "5e1b549edadb", + "markDefs": [ + { + "_key": "f454698e5c14", + "_type": "link", + "href": "https://seqera.io/pipelines/" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Seqera users can simply add EPI2ME pipelines to the Seqera Launchpad via ", + "_key": "1bd4c1b0b5de0" + }, + { + "text": "Seqera Pipelines ", + "_key": "1bd4c1b0b5de1", + "_type": "span", + "marks": [ + "f454698e5c14" + ] + }, + { + "marks": [], + "text": "or by pointing to the EPI2ME GitHub repo and selecting a pipeline version.", + "_key": "1bd4c1b0b5de2", + "_type": "span" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "After adding a tile for the EPI2ME pipeline in Seqera, users can launch pipelines to their preferred compute environments, monitor execution, and share resulting datasets and pipeline results.", + "_key": "fe351b8e99b70" + } + ], + "_type": "block", + "style": "normal", + "_key": "db03c5635b3c" + }, + { + "_key": "d640c4c27d05", + "_type": "youtube", + "id": "KWw0NP-CT_s" + }, + { + "_type": "block", + "style": "h3", + "_key": "6567e1952e72", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "\nEPI2ME pipelines using EPI2ME Desktop Application", + "_key": "00ee018999920", + "_type": "span" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "13b683fb7ce0", + "markDefs": [ + { + "href": "https://labs.epi2me.io/installation/", + "_key": "2d734f49d113", + "_type": "link" + } + ], + "children": [ + { + "marks": [], + "text": "Users can learn more about installing the EPI2ME desktop application ", + "_key": "d558435f20ad0", + "_type": "span" + }, + { + "_key": "d558435f20ad1", + "_type": "span", + "marks": [ + "2d734f49d113" + ], + "text": "here" + }, + { + "marks": [], + "text": ". This desktop tool uses Nextflow and Docker to run bioinformatics workflows and provides an intuitive, easy-to-use interface. With the EPI2ME desktop application, users can launch EPI2ME workflows and other Nextflow pipelines from their choice of desktop environment including Windows (via Windows Subsystem for Linux, WSL), MacOS, or Linux.", + "_key": "d558435f20ad2", + "_type": "span" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "e0ac6e1638e20" + } + ], + "_type": "block", + "style": "normal", + "_key": "eb0368719104", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Conclusion", + "_key": "ce58c255aba50" + } + ], + "_type": "block", + "style": "h2", + "_key": "9f2035517ace" + }, + { + "_key": "9f6b0fa9669b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "EPI2ME provides a comprehensive and valuable collection of Nextflow pipelines developed and made available by Oxford Nanopore, catering to a wide range of bioinformatics use cases. These pipelines can be deployed on both Seqera Platform and the EPI2ME Desktop application.", + "_key": "8f1e4c5afc0a0" + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "To learn more about EPI2ME, visit ", + "_key": "523fa3aa51180" + }, + { + "text": "https://nanoporetech.com/products/analyse/epi2me/", + "_key": "ed445f69116f", + "_type": "span", + "marks": [ + "a8eb33f3f46a" + ] + }, + { + "text": " or ", + "_key": "5c5bdcbe8cad", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "54cfdf09c8ff" + ], + "text": "sign-up", + "_key": "2505b16fc3c8", + "_type": "span" + }, + { + "marks": [], + "text": " for a free Seqera Cloud account now.", + "_key": "bf417933d86e", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "2fd1ef4c904d", + "markDefs": [ + { + "_key": "a8eb33f3f46a", + "_type": "link", + "href": "https://nanoporetech.com/products/analyse/epi2me/" + }, + { + "_key": "54cfdf09c8ff", + "_type": "link", + "href": "https://cloud.seqera.io/login?utm_source=hs_email&utm_campaign=Webinar%20What%27s%20New%20November%202024&utm_medium=email&utm_content=2&utm_term=fusionprod&utk=4fc237408a000b621d88cfef06fe09e0" + } + ] + } + ], + "meta": { + "description": "We thought it was high time that we gave some attention to EPI2ME Workflows - a set of professionally maintained Nextflow pipelines developed by the EPI2ME team at Oxford Nanopore Technologies (ONT).\n", + "noIndex": false, + "slug": { + "current": "epi2me-nextflow-pipelines", + "_type": "slug" + }, + "_type": "meta" + }, + "_updatedAt": "2024-10-15T13:01:13Z", + "_rev": "572bf426-c44a-4f55-99f7-35d9576b00b4", + "title": "Nextflow pipelines from EPI2ME ", + "publishedAt": "2024-10-18T14:33:00.000Z", + "_createdAt": "2024-10-14T09:15:05Z", + "_id": "drafts.208c3a9d-0253-486c-bc4e-6f233ef7080f", + "tags": [ + { + "_ref": "b6511053-299b-4aa5-8957-94fb9ebc9493", + "_type": "reference", + "_key": "aebece5c41a0" + } + ], + "author": { + "_ref": "paolo-di-tommaso", + "_type": "reference" + }, + "_type": "blogPost" + }, + { + "publishedAt": "2016-06-10T06:00:00.000Z", + "author": { + "_type": "reference", + "_ref": "evan-floden" + }, + "_updatedAt": "2024-10-16T14:28:48Z", + "meta": { + "slug": { + "current": "docker-for-dunces-nextflow-for-nunces" + } + }, + "_id": "drafts.561ca06ac707", + "body": [ + { + "children": [ + { + "_key": "aa74c907fb89", + "_type": "span", + "marks": [ + "em" + ], + "text": "Below is a step-by-step guide for creating [Docker](http://www.docker.io) images for use with [Nextflow](http://www.nextflow.io) pipelines. This post was inspired by recent experiences and written with the hope that it may encourage others to join in the virtualization revolution." + } + ], + "_type": "block", + "style": "normal", + "_key": "5de644223001", + "markDefs": [] + }, + { + "style": "normal", + "_key": "fba2c75d251d", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "1e58c8a15fb2", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Modern science is built on collaboration. Recently I became involved with one such venture between several groups across Europe. The aim was to annotate long non-coding RNA (lncRNA) in farm animals and I agreed to help with the annotation based on RNA-Seq data. The basic procedure relies on mapping short read data from many different tissues to a genome, generating transcripts and then determining if they are likely to be lncRNA or protein coding genes.", + "_key": "5ad57d04cb9d", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "50833a8d465d" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "f171be4200cf" + } + ], + "_type": "block", + "style": "normal", + "_key": "df4dbb73e883" + }, + { + "markDefs": [], + "children": [ + { + "_key": "85b35fa626c4", + "_type": "span", + "marks": [], + "text": "During several successful 'hackathon' meetings the best approach was decided and implemented in a joint effort. I undertook the task of wrapping the procedure up into a Nextflow pipeline with a view to replicating the results across our different institutions and to allow the easy execution of the pipeline by researchers anywhere." + } + ], + "_type": "block", + "style": "normal", + "_key": "84ce0feaea47" + }, + { + "_type": "block", + "style": "normal", + "_key": "ca94bc941408", + "markDefs": [], + "children": [ + { + "_key": "d043f09e00b4", + "_type": "span", + "marks": [], + "text": "" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "974f1a1cdfa3", + "markDefs": [ + { + "_type": "link", + "href": "http://www.github.com/cbcrg/lncrna-annotation-nf", + "_key": "99165958e6b5" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Creating the Nextflow pipeline (", + "_key": "155a8a08d8cd" + }, + { + "_type": "span", + "marks": [ + "99165958e6b5" + ], + "text": "here", + "_key": "357c4685588b" + }, + { + "text": ") in itself was not a difficult task. My collaborators had documented their work well and were on hand if anything was not clear. However installing and keeping aligned all the pipeline dependencies across different the data centers was still a challenging task.", + "_key": "f3317867e3c0", + "_type": "span", + "marks": [] + } + ] + }, + { + "children": [ + { + "_key": "2a5c98bf3a96", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "ab6f59d351cb", + "markDefs": [] + }, + { + "markDefs": [ + { + "_key": "905a8bc500ad", + "_type": "link", + "href": "https://www.docker.com/" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "The pipeline is typical of many in bioinformatics, consisting of binary executions, BASH scripting, R, Perl, BioPerl and some custom Perl modules. We found the BioPerl modules in particular where very sensitive to the various versions in the ", + "_key": "8390ee0ee4e6" + }, + { + "_key": "c58b7dc20cce", + "_type": "span", + "marks": [ + "em" + ], + "text": "long" + }, + { + "marks": [], + "text": " dependency tree. The solution was to turn to ", + "_key": "e384258a5c3f", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "905a8bc500ad" + ], + "text": "Docker", + "_key": "48755f8b6d14" + }, + { + "text": " containers.", + "_key": "236e84a2092d", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "004440881a96" + }, + { + "children": [ + { + "text": "", + "_key": "f4792876a9aa", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "55e482405e7c", + "markDefs": [] + }, + { + "_key": "df983b305d4f", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "I have taken this opportunity to document the process of developing the Docker side of a Nextflow + Docker pipeline in a step-by-step manner.", + "_key": "8fe4f707201e" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "649e13290a13", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "64ccdad0c58d" + }, + { + "style": "normal", + "_key": "9daaf61343a0", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "###Docker Installation", + "_key": "f8e4f2418ada", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "22f03df3d9b5", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "7dbae6fbfa16" + }, + { + "children": [ + { + "_key": "3af57ef1c497", + "_type": "span", + "marks": [], + "text": "By far the most challenging issue is the installation of Docker. For local installations, the " + }, + { + "text": "process is relatively straight forward", + "_key": "29497e07ff62", + "_type": "span", + "marks": [ + "b39b383b61e5" + ] + }, + { + "_key": "bac8833f273e", + "_type": "span", + "marks": [], + "text": ". However difficulties arise as computing moves to a cluster. Owing to security concerns, many HPC administrators have been reluctant to install Docker system-wide. This is changing and Docker developers have been responding to many of these concerns with " + }, + { + "text": "updates addressing these issues", + "_key": "f4e68c0049e2", + "_type": "span", + "marks": [ + "1664943865ae" + ] + }, + { + "text": ".", + "_key": "ebdcec8ebe01", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "a438411f6220", + "markDefs": [ + { + "_type": "link", + "href": "https://docs.docker.com/engine/installation", + "_key": "b39b383b61e5" + }, + { + "_type": "link", + "href": "https://blog.docker.com/2016/02/docker-engine-1-10-security/", + "_key": "1664943865ae" + } + ] + }, + { + "style": "normal", + "_key": "369a356018e0", + "markDefs": [], + "children": [ + { + "_key": "6ea9c938cc17", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "9c82fe0136e7", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "That being the case, local installations are usually perfectly fine for development. One of the golden rules in Nextflow development is to have a small test dataset that can run the full pipeline in minutes with few computational resources, ie can run on a laptop.", + "_key": "f06c6b5ed104", + "_type": "span" + } + ] + }, + { + "style": "normal", + "_key": "11b1347afcab", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "9f5f313834ae", + "_type": "span" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "3640fc87e1c5", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "If you have Docker and Nextflow installed and you wish to view the working pipeline, you can perform the following commands to obtain everything you need and run the full lncrna annotation pipeline on a test dataset.", + "_key": "0b77a23d5bf7" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "b0ad6ffae120" + } + ], + "_type": "block", + "style": "normal", + "_key": "9edd5abef435" + }, + { + "code": "docker pull cbcrg/lncrna_annotation\nnextflow run cbcrg/lncrna-annotation-nf -profile test", + "_type": "code", + "_key": "e04747c2e377" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "[If the following does not work, there could be a problem with your Docker installation.]", + "_key": "fb8752c7e000" + } + ], + "_type": "block", + "style": "normal", + "_key": "0fc16192bebe" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "0af154258b87" + } + ], + "_type": "block", + "style": "normal", + "_key": "e0523eff522a", + "markDefs": [] + }, + { + "style": "normal", + "_key": "773b9de99fad", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "The first command will download the required Docker image in your computer, while the second will launch Nextflow which automatically download the pipeline repository and run it using the test data included with it.", + "_key": "36689d3a632c", + "_type": "span" + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "6ba84ebe36e1", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "0973e8d341fc" + } + ] + }, + { + "style": "normal", + "_key": "1f30f62bc089", + "markDefs": [], + "children": [ + { + "text": "###The Dockerfile", + "_key": "3a33f2cb54af", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "50364dafcb96" + } + ], + "_type": "block", + "style": "normal", + "_key": "4cb45a2ade99" + }, + { + "markDefs": [], + "children": [ + { + "text": "The ", + "_key": "5f45a4596c7c", + "_type": "span", + "marks": [] + }, + { + "text": "Dockerfile", + "_key": "6e92add363fc", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_type": "span", + "marks": [], + "text": " contains all the instructions required by Docker to build the Docker image. It provides a transparent and consistent way to specify the base operating system and installation of all software, libraries and modules.", + "_key": "908e792d54df" + } + ], + "_type": "block", + "style": "normal", + "_key": "3f1d99c7b705" + }, + { + "_type": "block", + "style": "normal", + "_key": "eb6597312e37", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "eedc860980f3", + "_type": "span", + "marks": [] + } + ] + }, + { + "style": "normal", + "_key": "d4223ee66e84", + "markDefs": [], + "children": [ + { + "text": "We begin by creating a file ", + "_key": "b0b033a77a83", + "_type": "span", + "marks": [] + }, + { + "marks": [ + "code" + ], + "text": "Dockerfile", + "_key": "69aa3263d8b0", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " in the Nextflow project directory. The Dockerfile begins with:", + "_key": "ea7e45e2295a" + } + ], + "_type": "block" + }, + { + "_key": "b6aef4e4bff6", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "dd2d05610c5b" + } + ], + "_type": "block", + "style": "normal" + }, + { + "code": "# Set the base image to debian jessie\nFROM debian:jessie\n\n# File Author / Maintainer\nMAINTAINER Evan Floden ", + "_type": "code", + "_key": "c95932bd73bd" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "This sets the base distribution for our Docker image to be Debian v8.4, a lightweight Linux distribution that is ideally suited for the task. We must also specify the maintainer of the Docker image.", + "_key": "dbd6ec0da776" + } + ], + "_type": "block", + "style": "normal", + "_key": "dd72b4cb8f73" + }, + { + "_key": "e3e22b6493fa", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "8ce23ba404a7", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "24d492f7dd06", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Next we update the repository sources and install some essential tools such as ", + "_key": "883b5be27cf1" + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "wget", + "_key": "de93d23dcc24" + }, + { + "marks": [], + "text": " and ", + "_key": "a75f0d48042f", + "_type": "span" + }, + { + "_key": "b8f1f6977f76", + "_type": "span", + "marks": [ + "code" + ], + "text": "perl" + }, + { + "_type": "span", + "marks": [], + "text": ".", + "_key": "3d2e30dbd5be" + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "marks": [], + "text": "", + "_key": "74087f39767c", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "0b388a47cc16", + "markDefs": [] + }, + { + "_key": "0781f0913220", + "code": "RUN apt-get update && apt-get install --yes --no-install-recommends \\\n wget \\\n locales \\\n vim-tiny \\\n git \\\n cmake \\\n build-essential \\\n gcc-multilib \\\n perl \\\n python ...", + "_type": "code" + }, + { + "children": [ + { + "_key": "82c7900bb435", + "_type": "span", + "marks": [], + "text": "Notice that we use the command " + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "RUN", + "_key": "7029c2127e5e" + }, + { + "_type": "span", + "marks": [], + "text": " before each line. The ", + "_key": "cc075c2808b5" + }, + { + "text": "RUN", + "_key": "5372b2fbc07e", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "text": " instruction executes commands as if they are performed from the Linux shell.", + "_key": "54c24028d590", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "3ca70fafd6b8", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "2f4253d9b870" + } + ], + "_type": "block", + "style": "normal", + "_key": "24e6cf4eeaad" + }, + { + "_type": "block", + "style": "normal", + "_key": "ac0cc4e414e7", + "markDefs": [ + { + "_type": "link", + "href": "https://blog.replicated.com/2016/02/05/refactoring-a-dockerfile-for-image-size/", + "_key": "3b99f1c6e0d0" + }, + { + "href": "https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/", + "_key": "ee681c47a630", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Also is good practice to group as many as possible commands in the same ", + "_key": "a715e201a410" + }, + { + "text": "RUN", + "_key": "4c0542b30503", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_type": "span", + "marks": [], + "text": " statement. This reduces the size of the final Docker image. See ", + "_key": "cd0129fc2cb4" + }, + { + "_key": "95753b3703a7", + "_type": "span", + "marks": [ + "3b99f1c6e0d0" + ], + "text": "here" + }, + { + "_type": "span", + "marks": [], + "text": " for these details and ", + "_key": "b3d6166d7b40" + }, + { + "marks": [ + "ee681c47a630" + ], + "text": "here", + "_key": "ea9f63a37e2f", + "_type": "span" + }, + { + "marks": [], + "text": " for more best practices.", + "_key": "fec090986d03", + "_type": "span" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "9f35046732fb" + } + ], + "_type": "block", + "style": "normal", + "_key": "24659e48c3e7", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "57e5a413a943", + "markDefs": [ + { + "_key": "d68e3d739fed", + "_type": "link", + "href": "http://search.cpan.org/~miyagawa/Menlo-1.9003/script/cpanm-menlo" + } + ], + "children": [ + { + "_key": "ab9ae2c48fd3", + "_type": "span", + "marks": [], + "text": "Next we can specify the install of the required perl modules using " + }, + { + "marks": [ + "d68e3d739fed" + ], + "text": "cpan minus", + "_key": "376a38ae89cc", + "_type": "span" + }, + { + "_key": "b82c42d7d1f5", + "_type": "span", + "marks": [], + "text": ":" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "a23d9bbf5ef9", + "markDefs": [], + "children": [ + { + "_key": "0b5c9131deb9", + "_type": "span", + "marks": [], + "text": "" + } + ] + }, + { + "code": "# Install perl modules\nRUN cpanm --force CPAN::Meta \\\n YAML \\\n Digest::SHA \\\n Module::Build \\\n Data::Stag \\\n Config::Simple \\\n Statistics::Lite ...", + "_type": "code", + "_key": "e7530c3f6dba" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "We can give the instructions to download and install software from GitHub using:", + "_key": "c3ff2167e3c1", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "83711b5bfb64" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "6891af5db4de" + } + ], + "_type": "block", + "style": "normal", + "_key": "00fd8f533a9a" + }, + { + "_type": "code", + "_key": "ac765553f6ad", + "code": "# Install Star Mapper\nRUN wget -qO- https://github.com/alexdobin/STAR/archive/2.5.2a.tar.gz | tar -xz \\\n && cd STAR-2.5.2a \\\n && make STAR" + }, + { + "_key": "5387c5d1aae0", + "markDefs": [], + "children": [ + { + "_key": "21f01a7dee08", + "_type": "span", + "marks": [], + "text": "We can add custom Perl modules and specify environmental variables such as " + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "PERL5LIB", + "_key": "3c35ccd9597e" + }, + { + "_key": "7edd690d58bf", + "_type": "span", + "marks": [], + "text": " as below:" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_key": "88da8fa38161", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "95b43e15b080" + }, + { + "_type": "code", + "_key": "02cae409f036", + "code": "# Install FEELnc\nRUN wget -q https://github.com/tderrien/FEELnc/archive/a6146996e06f8a206a0ae6fd59f8ca635c7d9467.zip \\\n && unzip a6146996e06f8a206a0ae6fd59f8ca635c7d9467.zip \\\n && mv FEELnc-a6146996e06f8a206a0ae6fd59f8ca635c7d9467 /FEELnc \\\n && rm a6146996e06f8a206a0ae6fd59f8ca635c7d9467.zip\n\nENV FEELNCPATH /FEELnc\nENV PERL5LIB $PERL5LIB:${FEELNCPATH}/lib/" + }, + { + "_key": "3db7c8965a0b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "R and R libraries can be installed as follows:", + "_key": "fab1d01a8d76" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "7e8b16febe0b", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "369cb978dbc9" + }, + { + "_type": "code", + "_key": "b635cd93fe02", + "code": "# Install R\nRUN echo \"deb http://cran.rstudio.com/bin/linux/debian jessie-cran3/\" >> /etc/apt/sources.list &&\\\napt-key adv --keyserver keys.gnupg.net --recv-key 381BA480 &&\\\napt-get update --fix-missing && \\\napt-get -y install r-base\n\n# Install R libraries\nRUN R -e 'install.packages(\"ROCR\", repos=\"http://cloud.r-project.org/\"); install.packages(\"randomForest\",repos=\"http://cloud.r-project.org/\")'" + }, + { + "children": [ + { + "text": "For the complete working Dockerfile of this project see ", + "_key": "31f01f88d7d4", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "95d80901751f" + ], + "text": "here", + "_key": "61cd37841c10" + } + ], + "_type": "block", + "style": "normal", + "_key": "f897e630ac44", + "markDefs": [ + { + "_type": "link", + "href": "https://github.com/cbcrg/lncRNA-Annotation-nf/blob/master/Dockerfile", + "_key": "95d80901751f" + } + ] + }, + { + "_type": "block", + "style": "normal", + "_key": "34a2ed31ef9a", + "markDefs": [], + "children": [ + { + "_key": "ec0c46f9c3c6", + "_type": "span", + "marks": [], + "text": "" + } + ] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "###Building the Docker Image", + "_key": "99404c3f6b68" + } + ], + "_type": "block", + "style": "normal", + "_key": "1abf7c16ad8c", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "1d5e4d812566" + } + ], + "_type": "block", + "style": "normal", + "_key": "b636fd70f4f5", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "5650048a4760", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Once we start working on the Dockerfile, we can build it anytime using:", + "_key": "5264f09e8e11", + "_type": "span" + } + ] + }, + { + "style": "normal", + "_key": "d641e7f6bf5b", + "markDefs": [], + "children": [ + { + "_key": "70fcc4126623", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block" + }, + { + "_type": "code", + "_key": "e90f06c1b843", + "code": "docker build -t skptic/lncRNA_annotation ." + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "This builds the image from the Dockerfile and assigns a tag (i.e. a name) for the image. If there are no errors, the Docker image is now in you local Docker repository ready for use.", + "_key": "fe3388bbb799" + } + ], + "_type": "block", + "style": "normal", + "_key": "8ccc8a028371" + }, + { + "_type": "block", + "style": "normal", + "_key": "7738ce1608b0", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "23129a90f294", + "_type": "span" + } + ] + }, + { + "_key": "53e684ed0883", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "###Testing the Docker Image", + "_key": "ac9aefee0790", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "f459dd9c7e8f", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "29310a754336", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_key": "995c7f634de1", + "markDefs": [], + "children": [ + { + "text": "We find it very helpful to test our images as we develop the Docker file. Once built, it is possible to launch the Docker image and test if the desired software was correctly installed. For example, we can test if FEELnc and its dependencies were successfully installed by running the following:", + "_key": "0f7532136e6a", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "0f3a99e8f0f9", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "76902143bcad", + "_type": "span" + } + ], + "_type": "block" + }, + { + "code": "docker run -ti lncrna_annotation\n\ncd FEELnc/test\n\nFEELnc_filter.pl -i transcript_chr38.gtf -a annotation_chr38.gtf \\\n> -b transcript_biotype=protein_coding > candidate_lncRNA.gtf\n\nexit # remember to exit the Docker image", + "_type": "code", + "_key": "8bc163f9f47c" + }, + { + "style": "normal", + "_key": "8a04e5fe54c3", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "###Tagging the Docker Image", + "_key": "3c27e7d47f5a", + "_type": "span" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "b58a8fff4134", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "376d9185809e", + "markDefs": [] + }, + { + "style": "normal", + "_key": "1a1035fe3e9e", + "markDefs": [ + { + "_type": "link", + "href": "https://hub.docker.com/", + "_key": "e8267b213edb" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Once you are confident your image is built correctly, you can tag it, allowing you to push it to ", + "_key": "0e81f997274e" + }, + { + "_type": "span", + "marks": [ + "e8267b213edb" + ], + "text": "Dockerhub.io", + "_key": "9f99511c671e" + }, + { + "_type": "span", + "marks": [], + "text": ". Dockerhub is an online repository for docker images which allows anyone to pull public images and run them.", + "_key": "62279d8d8677" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "629916622f88" + } + ], + "_type": "block", + "style": "normal", + "_key": "7ad2329cd8e6" + }, + { + "style": "normal", + "_key": "ab2403070ee6", + "markDefs": [], + "children": [ + { + "_key": "83a7985ea39e", + "_type": "span", + "marks": [], + "text": "You can view the images in your local repository with the " + }, + { + "text": "docker images", + "_key": "9b9c237f8f87", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_type": "span", + "marks": [], + "text": " command and tag using ", + "_key": "6aaa7f1f9459" + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "docker tag", + "_key": "56edcf9c0231" + }, + { + "_type": "span", + "marks": [], + "text": " with the image ID and the name.", + "_key": "fccfd00ea0ef" + } + ], + "_type": "block" + }, + { + "_key": "2883293716da", + "markDefs": [], + "children": [ + { + "_key": "4796d2e24cad", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "code", + "_key": "cb58c9b6a966", + "code": "docker images\n\nREPOSITORY TAG IMAGE ID CREATED SIZE\nlncrna_annotation latest d8ec49cbe3ed 2 minutes ago 821.5 MB\n\ndocker tag d8ec49cbe3ed cbcrg/lncrna_annotation:latest" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Now when we check our local images we can see the updated tag.", + "_key": "efecf9499efc" + } + ], + "_type": "block", + "style": "normal", + "_key": "977cb77dafd8" + }, + { + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "de27f8c8d34d", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "9e069ba58981" + }, + { + "code": "docker images\n\nREPOSITORY TAG IMAGE ID CREATED SIZE\ncbcrg/lncrna_annotation latest d8ec49cbe3ed 2 minutes ago 821.5 MB", + "_type": "code", + "_key": "859c42e5cad8" + }, + { + "markDefs": [], + "children": [ + { + "_key": "adbb0489873f", + "_type": "span", + "marks": [], + "text": "###Pushing the Docker Image to Dockerhub" + } + ], + "_type": "block", + "style": "normal", + "_key": "36110c0bc0bc" + }, + { + "style": "normal", + "_key": "72eb6aa2d1ff", + "markDefs": [], + "children": [ + { + "_key": "1818ebcbf996", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block" + }, + { + "children": [ + { + "text": "If you have not previously, sign up for a Dockerhub account ", + "_key": "d3c68be9bab9", + "_type": "span", + "marks": [] + }, + { + "text": "here", + "_key": "73adbe5a767b", + "_type": "span", + "marks": [ + "1cf86a9aeb72" + ] + }, + { + "_key": "fdb56fb68fc0", + "_type": "span", + "marks": [], + "text": ". From the command line, login to Dockerhub and push your image." + } + ], + "_type": "block", + "style": "normal", + "_key": "a7bd5e43df27", + "markDefs": [ + { + "_key": "1cf86a9aeb72", + "_type": "link", + "href": "https://hub.docker.com/" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "74b517dfa3a2" + } + ], + "_type": "block", + "style": "normal", + "_key": "76d11410797f" + }, + { + "code": "docker login --username=cbcrg\ndocker push cbcrg/lncrna_annotation", + "_type": "code", + "_key": "72e018a1b3a7" + }, + { + "_key": "4e814562758e", + "markDefs": [], + "children": [ + { + "text": "You can test if you image has been correctly pushed and is publicly available by removing your local version using the IMAGE ID of the image and pulling the remote:", + "_key": "603c47308e12", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "25b68c28836e" + } + ], + "_type": "block", + "style": "normal", + "_key": "12f3da40fcc2", + "markDefs": [] + }, + { + "_type": "code", + "_key": "9c8cc03d66d4", + "code": "docker rmi -f d8ec49cbe3ed\n\n# Ensure the local version is not listed.\ndocker images\n\ndocker pull cbcrg/lncrna_annotation" + }, + { + "children": [ + { + "marks": [], + "text": "We are now almost ready to run our pipeline. The last step is to set up the Nexflow config.", + "_key": "fb18e8ebb6fb", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "67b0d083f1e1", + "markDefs": [] + }, + { + "_key": "851718e1c203", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "7e1f6285672c", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "text": "###Nextflow Configuration", + "_key": "ee703ba1a7b8", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "89a8e9b57253", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "3c12e4f84be6", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "301b53373abc" + }, + { + "children": [ + { + "text": "Within the ", + "_key": "e450d4c03687", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "code" + ], + "text": "nextflow.config", + "_key": "eb562dcd976e" + }, + { + "_type": "span", + "marks": [], + "text": " file in the main project directory we can add the following line which links the Docker image to the Nexflow execution. The images can be:", + "_key": "0aada97916c3" + } + ], + "_type": "block", + "style": "normal", + "_key": "853618d141bc", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "bcefea639daa", + "markDefs": [], + "children": [ + { + "_key": "56eecb336e47", + "_type": "span", + "marks": [], + "text": "" + } + ] + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "General (same docker image for all processes):\n\n process {\n container = 'cbcrg/lncrna_annotation'\n }\nSpecific to a profile (specified by `-profile crg` for example):\n\n profile {\n crg {\n container = 'cbcrg/lncrna_annotation'\n }\n }\nSpecific to a given process within a pipeline:\n\n $processName.container = 'cbcrg/lncrna_annotation'", + "_key": "dc79564414fe" + } + ], + "_type": "block", + "style": "normal", + "_key": "b30a67dabb0c" + }, + { + "_type": "block", + "style": "normal", + "_key": "a560b7a67c40", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "14aebd1b7468", + "_type": "span", + "marks": [] + } + ] + }, + { + "_key": "4033d3bebdf9", + "markDefs": [ + { + "href": "https://www.nextflow.io/blog/2016/best-practice-for-reproducibility.html", + "_key": "f61aacdb2ef0", + "_type": "link" + } + ], + "children": [ + { + "text": "In most cases it is easiest to use the same Docker image for all processes. One further thing to consider is the inclusion of the sha256 hash of the image in the container reference. I have ", + "_key": "1a64527f3033", + "_type": "span", + "marks": [] + }, + { + "text": "previously written about this", + "_key": "38cf9657683c", + "_type": "span", + "marks": [ + "f61aacdb2ef0" + ] + }, + { + "_type": "span", + "marks": [], + "text": ", but briefly, including a hash ensures that not a single byte of the operating system or software is different.", + "_key": "bc4e97553513" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "441548f75de3" + } + ], + "_type": "block", + "style": "normal", + "_key": "0eaf14f96c05" + }, + { + "code": " process {\n container = 'cbcrg/lncrna_annotation@sha256:9dfe233b...'\n }", + "_type": "code", + "_key": "e986f84b6af5" + }, + { + "children": [ + { + "marks": [], + "text": "All that is left now to run the pipeline.", + "_key": "132c729c8d25", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "39e6843958d4", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "a4a85a9e7228", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "3eba503d4fca" + } + ] + }, + { + "_key": "a90f3eeed817", + "code": "nextflow run lncRNA-Annotation-nf -profile test", + "_type": "code" + }, + { + "_key": "e51c1eda68c5", + "markDefs": [], + "children": [ + { + "_key": "6bc1b9275274", + "_type": "span", + "marks": [], + "text": "Whilst I have explained this step-by-step process in a linear, consequential manner, in reality the development process is often more circular with changes in the Docker images reflecting changes in the pipeline." + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "f4ab602e7e18", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "7ece5b1d69ec" + } + ] + }, + { + "_key": "3e9640736a34", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "###CircleCI and Nextflow", + "_key": "127548bd2ca6" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "48776ea3a77d", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "8bd12b9a35d5" + }, + { + "style": "normal", + "_key": "006188af7329", + "markDefs": [ + { + "href": "http://www.circleci.com", + "_key": "52d7d21fec88", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Now that you have a pipeline that successfully runs on a test dataset with Docker, a very useful step is to add a continuous development component to the pipeline. With this, whenever you push a modification of the pipeline to the GitHub repo, the test data set is run on the ", + "_key": "e1a0115c8a63" + }, + { + "_type": "span", + "marks": [ + "52d7d21fec88" + ], + "text": "CircleCI", + "_key": "bf9e5650e51a" + }, + { + "text": " servers (using Docker).", + "_key": "a7690c9f35e1", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_key": "bbb43942df4f", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "565cf1047e08" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "41364a5f63d3", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "To include CircleCI in the Nexflow pipeline, create a file named ", + "_key": "f8cea4ca2097", + "_type": "span" + }, + { + "marks": [ + "code" + ], + "text": "circle.yml", + "_key": "fa98c01db045", + "_type": "span" + }, + { + "_key": "2f21b332f3b0", + "_type": "span", + "marks": [], + "text": " in the project directory. We add the following instructions to the file:" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "", + "_key": "25942a6c677a" + } + ], + "_type": "block", + "style": "normal", + "_key": "e2b0b14d0fd2" + }, + { + "code": "machine:\n java:\n version: oraclejdk8\n services:\n - docker\n\ndependencies:\n override:\n\ntest:\n override:\n - docker pull cbcrg/lncrna_annotation\n - curl -fsSL get.nextflow.io | bash\n - ./nextflow run . -profile test", + "_type": "code", + "_key": "7433acb412d2" + }, + { + "children": [ + { + "marks": [], + "text": "Next you can sign up to CircleCI, linking your GitHub account.", + "_key": "433129d9fd5e", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "70d6d1859e1d", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_key": "3a0243c5639e", + "_type": "span", + "marks": [], + "text": "" + } + ], + "_type": "block", + "style": "normal", + "_key": "2f2296b7bb34" + }, + { + "_key": "261b716a06a7", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "Within the GitHub README.md you can add a badge with the following:", + "_key": "0be2d4a60379", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "c227bf5b9089", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "64db719ff8e1", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "code": "![CircleCI status](https://circleci.com/gh/cbcrg/lncRNA-Annotation-nf.png?style=shield)", + "_type": "code", + "_key": "a375b3bed0e9" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "###Tips and Tricks", + "_key": "46f101c27e69" + } + ], + "_type": "block", + "style": "normal", + "_key": "1642b961bc5a", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "993ba2832874", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "dd9168b63937", + "_type": "span" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "text": "File permissions", + "_key": "a2f6b726c62d", + "_type": "span", + "marks": [ + "strong" + ] + }, + { + "_type": "span", + "marks": [], + "text": ": When a process is executed by a Docker container, the UNIX user running the process is not you. Therefore any files that are used as an input should have the appropriate file permissions. For example, I had to change the permissions of all the input data in the test data set with:", + "_key": "287310bd8df1" + } + ], + "_type": "block", + "style": "normal", + "_key": "d33d746e9473" + }, + { + "_type": "block", + "style": "normal", + "_key": "0fd72cb1c652", + "markDefs": [], + "children": [ + { + "text": "", + "_key": "f294eccb09f6", + "_type": "span", + "marks": [] + } + ] + }, + { + "style": "normal", + "_key": "d03452f5b41c", + "markDefs": [], + "children": [ + { + "text": "find -type f -exec chmod 644 {} \\; find -type d -exec chmod 755 {} \\;", + "_key": "d7e384eae7c7", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "_type": "block", + "style": "normal", + "_key": "5c097f5ad5b2", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "8dc4ce35290d", + "_type": "span" + } + ] + }, + { + "markDefs": [ + { + "_key": "a645ea709cb2", + "_type": "link", + "href": "mailto:/evanfloden@gmail.com" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "###Summary This was my first time building a Docker image and after a bit of trial-and-error the process was surprising straight forward. There is a wealth of information available for Docker and the almost seamless integration with Nextflow is fantastic. Our collaboration team is now looking forward to applying the pipeline to different datasets and publishing the work, knowing our results will be completely reproducible across any platform. ", + "_key": "f21d4187558c" + }, + { + "_key": "38e9df5b660d", + "_type": "span", + "marks": [ + "a645ea709cb2" + ], + "text": "/evanfloden@gmail.com" + } + ], + "_type": "block", + "style": "normal", + "_key": "16153769e1e6" + } + ], + "title": "Docker for dunces & Nextflow for nunces", + "tags": [ + { + "_ref": "ace8dd2c-eed3-4785-8911-d146a4e84bbb", + "_type": "reference", + "_key": "5edc3ed408ba" + }, + { + "_ref": "b6511053-299b-4aa5-8957-94fb9ebc9493", + "_type": "reference", + "_key": "c2a74b2b2cad" + } + ], + "_type": "blogPost", + "_createdAt": "2024-09-25T14:15:05Z", + "_rev": "1caaa5f4-55b0-4c8f-a7fa-f6e12936c553" + }, + { + "_rev": "46bcb683-925f-4a71-8f3e-b9210031d1b8", + "meta": { + "description": "asdasd", + "noIndex": true, + "slug": { + "_type": "slug", + "current": "singularity-reloaded-2" + }, + "_type": "meta" + }, + "_id": "drafts.9fb5989a-4718-4430-b79d-414c0046c359", + "title": "Test article", + "publishedAt": "2024-04-18T15:35:00.000Z", + "tags": [ + { + "_key": "1bbbc4317abb", + "_ref": "b6511053-299b-4aa5-8957-94fb9ebc9493", + "_type": "reference" + }, + { + "_ref": "1b55a117-18fe-40cf-8873-6efd157a6058", + "_type": "reference", + "_key": "342b6f7ba8ea" + }, + { + "_key": "535a050b7e1c", + "_ref": "d356a4d5-06c1-40c2-b655-4cb21cf74df1", + "_type": "reference" + } + ], + "author": { + "_type": "reference", + "_ref": "bfa556d4-8ea3-419d-99f9-3716804c5f2a" + }, + "body": [ + { + "_key": "f7338c80f8da", + "markDefs": [], + "children": [ + { + "_key": "b13a6f8930ec0", + "_type": "span", + "marks": [], + "text": "Containers are essential components in reproducible scientific workflows. They enable applications to be easily packaged and distributed along with dependencies, making them portable across operating systems, runtimes, and clouds." + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://sylabs.io/singularity/", + "_key": "15042503705b" + }, + { + "_key": "1bbf0802e6b5", + "_type": "link", + "href": "https://apptainer.org/news/community-announcement-20211130/" + } + ], + "children": [ + { + "_key": "96d378e2b2470", + "_type": "span", + "marks": [], + "text": "While Docker is the most popular container runtime and file format, " + }, + { + "_type": "span", + "marks": [ + "15042503705b" + ], + "text": "Singularity", + "_key": "96d378e2b2471" + }, + { + "_type": "span", + "marks": [], + "text": " (and now ", + "_key": "96d378e2b2472" + }, + { + "text": "Apptainer", + "_key": "96d378e2b2473", + "_type": "span", + "marks": [ + "1bbf0802e6b5" + ] + }, + { + "_type": "span", + "marks": [], + "text": ") have emerged as preferred solutions in HPC settings. For HPC users, Singularity provides several advantages:", + "_key": "96d378e2b2474" + } + ], + "_type": "block", + "style": "normal", + "_key": "bedaa0d3d2b6" + }, + { + "_type": "block", + "style": "normal", + "_key": "d62d8d3e6e43", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "3ed7116a3a920", + "_type": "span", + "marks": [], + "text": "Singularity runs under a Linux user's UID, avoiding security concerns and simplifying file system access in multi-user environments." + } + ], + "level": 1 + }, + { + "_type": "block", + "style": "normal", + "_key": "736be0294d42", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Singularity Image Format (SIF) containers are stored as individual files, making them portable across cluster nodes, easy to manage, and fast to load.", + "_key": "2ffc515f96d3" + } + ], + "level": 1 + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "Containers work seamlessly with workload managers such as Slurm or Spectrum LSF, running under the workload manager’s control rather than as a child of the Docker daemon.", + "_key": "d08b4222f9ea" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "4f986efa8cb6", + "listItem": "bullet", + "markDefs": [] + }, + { + "children": [ + { + "marks": [], + "text": "This article explains how ", + "_key": "30b2d63f14200", + "_type": "span" + }, + { + "_key": "30b2d63f14201", + "_type": "span", + "marks": [ + "42bd86b81ff3" + ], + "text": "Nextflow" + }, + { + "marks": [], + "text": " and ", + "_key": "30b2d63f14202", + "_type": "span" + }, + { + "_type": "span", + "marks": [ + "cf8ddb481912" + ], + "text": "Wave", + "_key": "30b2d63f14203" + }, + { + "_key": "30b2d63f14204", + "_type": "span", + "marks": [], + "text": " are evolving to meet the needs of HPC users, supporting new capabilities in both Singularity and Apptainer. Read on to learn more!" + } + ], + "_type": "block", + "style": "normal", + "_key": "738092543938", + "markDefs": [ + { + "_key": "42bd86b81ff3", + "_type": "link", + "href": "https://seqera.io/nextflow/" + }, + { + "_type": "link", + "href": "https://seqera.io/wave/", + "_key": "cf8ddb481912" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_key": "97dd14bd260c0", + "_type": "span", + "marks": [], + "text": "Singularity vs. Apptainer" + } + ], + "_type": "block", + "style": "h2", + "_key": "6cad29f83c4b" + }, + { + "_type": "block", + "style": "normal", + "_key": "029194e5aa78", + "markDefs": [ + { + "href": "https://sylabs.io/", + "_key": "a7ac85241dde", + "_type": "link" + }, + { + "href": "https://hpcng.org/", + "_key": "3c204b3bdb74", + "_type": "link" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "There is often confusion between Singularity and Apptainer, so it is worth providing a brief explanation. When ", + "_key": "d622afbe8b960" + }, + { + "marks": [ + "a7ac85241dde" + ], + "text": "Sylabs", + "_key": "d622afbe8b961", + "_type": "span" + }, + { + "_type": "span", + "marks": [], + "text": " forked the Singularity project from the ", + "_key": "d622afbe8b962" + }, + { + "_type": "span", + "marks": [ + "3c204b3bdb74" + ], + "text": "HPCng", + "_key": "d622afbe8b963" + }, + { + "marks": [], + "text": " repository in May of 2021, they chose not to rename their fork. As a result, the name “Singularity” described both the original open-source project and Sylabs’ new version underpinning their commercial offerings.", + "_key": "d622afbe8b964", + "_type": "span" + } + ] + }, + { + "style": "normal", + "_key": "f4ae7180a502", + "markDefs": [ + { + "href": "https://apptainer.org/", + "_key": "7be27073a836", + "_type": "link" + }, + { + "_key": "416a88e85588", + "_type": "link", + "href": "https://sylabs.io/singularity/" + }, + { + "_key": "943aa1a4c6f9", + "_type": "link", + "href": "https://sylabs.io/singularity-pro/" + }, + { + "_type": "link", + "href": "https://apptainer.org/", + "_key": "b624ce929ab4" + } + ], + "children": [ + { + "_type": "span", + "marks": [], + "text": "To avoid confusion, members of the original Singularity project moved their project to the Linux Foundation in November 2021, and renamed it “", + "_key": "b0e0b949c8790" + }, + { + "marks": [ + "7be27073a836" + ], + "text": "Apptainer", + "_key": "b0e0b949c8791", + "_type": "span" + }, + { + "text": ".” As a result of these moves, Singularity has diverged. ", + "_key": "b0e0b949c8792", + "_type": "span", + "marks": [] + }, + { + "_key": "b0e0b949c8793", + "_type": "span", + "marks": [ + "416a88e85588" + ], + "text": "SingularityCE" + }, + { + "_key": "b0e0b949c8794", + "_type": "span", + "marks": [], + "text": " and " + }, + { + "_type": "span", + "marks": [ + "943aa1a4c6f9" + ], + "text": "SingularityPro", + "_key": "b0e0b949c8795" + }, + { + "marks": [], + "text": " are maintained by Sylabs, and open-source Apptainer is available from ", + "_key": "b0e0b949c8796", + "_type": "span" + }, + { + "marks": [ + "b624ce929ab4" + ], + "text": "apptainer.org", + "_key": "b0e0b949c8797", + "_type": "span" + }, + { + "text": " with available commercial support.", + "_key": "b0e0b949c8798", + "_type": "span", + "marks": [] + } + ], + "_type": "block" + }, + { + "markDefs": [ + { + "_type": "link", + "href": "https://nextflow.io/docs/latest/container.html#singularity", + "_key": "d6e909d02df2" + }, + { + "_key": "c5c4cb0b927e", + "_type": "link", + "href": "https://nextflow.io/docs/latest/container.html#apptainer" + } + ], + "children": [ + { + "_key": "ee43b516076e0", + "_type": "span", + "marks": [], + "text": "Nextflow and Seqera fully support both Singularity dialects, treating " + }, + { + "marks": [ + "d6e909d02df2" + ], + "text": "Singularity", + "_key": "ee43b516076e1", + "_type": "span" + }, + { + "_key": "ee43b516076e2", + "_type": "span", + "marks": [], + "text": " and " + }, + { + "_type": "span", + "marks": [ + "c5c4cb0b927e" + ], + "text": "Apptainer", + "_key": "ee43b516076e3" + }, + { + "text": " as distinct offerings reflecting their unique and evolving features.", + "_key": "ee43b516076e4", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "39bbf08809ac" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Nextflow support for Singularity and Apptainer", + "_key": "b2ed20d4e95a0" + } + ], + "_type": "block", + "style": "h2", + "_key": "42f286c076f2" + }, + { + "_type": "block", + "style": "normal", + "_key": "87e3b726fcd5", + "markDefs": [ + { + "_key": "5d72a0a1037f", + "_type": "link", + "href": "https://hub.docker.com/" + }, + { + "_type": "link", + "href": "https://quay.io/", + "_key": "63433f065473" + }, + { + "_type": "link", + "href": "https://aws.amazon.com/ecr/", + "_key": "28c6cc646431" + } + ], + "children": [ + { + "marks": [], + "text": "Nextflow can pull containers in different formats from multiple sources, including Singularity Hub, Singularity Library, or Docker/OCI-compatible registries such as ", + "_key": "5f094f8e09d00", + "_type": "span" + }, + { + "text": "Docker Hub", + "_key": "5f094f8e09d01", + "_type": "span", + "marks": [ + "5d72a0a1037f" + ] + }, + { + "_type": "span", + "marks": [], + "text": ", ", + "_key": "5f094f8e09d02" + }, + { + "text": "Quay.io", + "_key": "5f094f8e09d03", + "_type": "span", + "marks": [ + "63433f065473" + ] + }, + { + "text": ", or ", + "_key": "5f094f8e09d04", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "28c6cc646431" + ], + "text": "Amazon ECR", + "_key": "5f094f8e09d05" + }, + { + "_key": "5f094f8e09d06", + "_type": "span", + "marks": [], + "text": ". In HPC environments, Nextflow users can also point to existing SIF format images that reside on a shared file system.\n\n" + } + ] + }, + { + "asset": { + "_ref": "image-eff63aca1ce03113f328233754388476d21122c3-736x414-jpg", + "_type": "reference" + }, + "_type": "image", + "_key": "38a09d9b4da5" + }, + { + "_key": "4f043506d28f", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "\nFor Nextflow users in HPC environments, a common usage pattern has been to have Nextflow download and convert OCI/Docker images to SIF format on the fly. For this to work, scratch storage needs to be available on the cluster node running the Nextflow head job to facilitate downloading the container’s OCI blob layers and assembling the SIF file. The resulting SIF file IS then stored on a shared file system accessible to other cluster nodes. While this works, there are problems with this approach:", + "_key": "318361117b6a0" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "2b01846439c1", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_key": "3d2579c339120", + "_type": "span", + "marks": [], + "text": "Having the Nextflow head node responsible for downloading and converting multiple images presents a bottleneck that affects performance." + } + ], + "level": 1, + "_type": "block" + }, + { + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "In production environments, pointing ", + "_key": "5147a083367f", + "_type": "span", + "marks": [] + }, + { + "_type": "span", + "marks": [ + "em" + ], + "text": "SINGULARITY_TMPDIR", + "_key": "07ba1ab2625c" + }, + { + "_key": "0c8d0debd1ac", + "_type": "span", + "marks": [], + "text": " to fast local storage is a standard practice for speeding the generation of SIF format images, but this adds configuration complexity in clustered environments.\n" + } + ], + "level": 1, + "_type": "block", + "style": "normal", + "_key": "4c518f71df22" + }, + { + "style": "h2", + "_key": "650e0c02a21f", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "A better approach using Nextflow ociAutoPull", + "_key": "753e95324b29", + "_type": "span" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "_key": "0e650e126dc5", + "_type": "span", + "marks": [], + "text": "As of version 23.12.0-edge, Nextflow provides a new `ociAutoPull` option for both Singularity and Apptainer that delegates the conversion of OCI-compliant images to Singularity format to the container runtime itself\n\nThis approach has several advantages over the previous approach:\n\n" + } + ], + "_type": "block", + "style": "normal", + "_key": "decd5c6bbfd9" + }, + { + "_key": "e13521fd6454", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "", + "_key": "a4d90da06d1a", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "style": "normal", + "_key": "e379ee76dbfc", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "text": "The pull and conversion phase of generating SIF files from OCI images is managed by the container runtime instead of by Nextflow.", + "_key": "c7c4fe5e6cfa", + "_type": "span", + "marks": [] + } + ], + "level": 1, + "_type": "block" + }, + { + "level": 1, + "_type": "block", + "style": "normal", + "_key": "0dade09a96bd", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "marks": [], + "text": "The pull and conversion happen on compute nodes instead of the node running the head job, thus freeing up the head node and enabling conversions to execute in parallel.", + "_key": "7209e7e6c588", + "_type": "span" + } + ] + }, + { + "_key": "a48400c30d94", + "listItem": "bullet", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Images are cached on the compute nodes with the OCI layers intact. Assuming images are cached on a shared file system, when two containers share the same base images, only one copy needs to be retained. This avoids the need for unnecessary downloads and processing.", + "_key": "b63279f2b87a" + } + ], + "level": 1, + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "The example below illustrates how this works in practice:\n", + "_key": "7a338e68755c" + } + ], + "_type": "block", + "style": "normal", + "_key": "736f3fda8e11", + "markDefs": [] + }, + { + "children": [ + { + "marks": [ + "code" + ], + "text": "\nsingularity.enabled = true\nsingularity.ociAutoPull = true\nprocess.container = 'ubuntu:latest'", + "_key": "31d648e84ab9", + "_type": "span" + }, + { + "text": "\n", + "_key": "ac0e10d2f780", + "_type": "span", + "marks": [] + }, + { + "text": "\n$ nextflow run hello -c \n", + "_key": "54962958b47e", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_type": "span", + "marks": [], + "text": "\n\nIf you are using Apptainer, replace the scope singularity with `apptainer` in the Nextflow config example above.", + "_key": "9aa862af40f4" + } + ], + "_type": "block", + "style": "normal", + "_key": "58d2e52b7129", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [], + "text": "Running OCI format containers", + "_key": "4b1c5a4da958" + } + ], + "_type": "block", + "style": "h2", + "_key": "acc549d83bfe" + }, + { + "_key": "e72f6254629d", + "markDefs": [], + "children": [ + { + "text": "Apptainer now supports multiple image formats including Singularity SIF files, SquashFS files, and Docker/OCI containers hosted on an OCI registry. As of SingularityCE 4.0, Sylabs introduced a new SIF image format that directly encapsulates OCI containers. They also introduced a new OCI mode enabled by the `--oci` command line switch or by adding the `oci mode` directive to the `singularity.conf` file.\n\nWhen OCI mode is enabled, Singularity uses a new low-level runtime to achieve OCI compatibility. This is a major step forward, allowing Singularity to execute OCI-compliant container images directly, solving previous compatibility issues. For Singularity users, this new runtime and direct support for OCI container images make it much more efficient to run OCI containers.\n\nIn Nextflow, this functionality can be enabled as follows:", + "_key": "940cd721e4ca", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "block", + "style": "normal", + "_key": "550188acdced", + "markDefs": [], + "children": [ + { + "marks": [ + "code" + ], + "text": "```\nsingularity.enabled = true\nsingularity.ociMode = true\nprocess.container = 'ubuntu:latest'\n```", + "_key": "fe85379057ef", + "_type": "span" + }, + { + "text": "\n", + "_key": "7aaf83c6cfeb", + "_type": "span", + "marks": [] + } + ] + }, + { + "style": "normal", + "_key": "56647f3fa67b", + "markDefs": [], + "children": [ + { + "text": "```\nnextflow run hello -c \n```", + "_key": "ebf86ed2be24", + "_type": "span", + "marks": [ + "code" + ] + } + ], + "_type": "block" + }, + { + "children": [ + { + "marks": [], + "text": "Wave support for Singularity", + "_key": "ae510f1077cf", + "_type": "span" + } + ], + "_type": "block", + "style": "h2", + "_key": "2438b250b0d3", + "markDefs": [] + }, + { + "children": [ + { + "text": "In addition to the feature above, Nextflow provides better support for Singularity and Wave containers.\n\nWave is a container provisioning service that, among other things, allows for the on-demand assembly of containers based on the dependencies of the jobs in your data analysis workflows.\n\nNextflow, along with Wave, allows you to build Singularity native images by using the Conda packages declared in your Nextflow configuration file. Singularity container images are stored in an OCI-compliant registry and pulled on demand by your pipeline.\n\nTo enable this capability, you will need to add the following settings to your nextflow.config. In our example, these settings were stored in `wave-singularity.config`.", + "_key": "59bc036c04fe", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "e49356276920", + "markDefs": [] + }, + { + "style": "normal", + "_key": "7e0b3498eba6", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "```", + "_key": "3e01b8196434" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "4adeda237ffd", + "markDefs": [], + "children": [ + { + "text": "singularity.enabled = true\nsingularity.autoMounts = true\nsingularity.ociAutoPull = true\n\nwave.enabled = true\nwave.freeze = true\nwave.build.repository = 'docker.io//wavebuild'\nwave.build.cacheRepository = 'docker.io//wave-cache'\n\ntower.accessToken = ''\ntower.workspaceId = ''\n\nwave.strategy = ['conda']\nconda.channels = 'seqera,conda-forge,bioconda,defaults'\n```", + "_key": "a45394330d4f", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_type": "span", + "marks": [], + "text": "\n\nYou can test this configuration using the command below. In this example. Nextflow invokes Wave to build Singularity containers on the fly and freezes them to a repository using credentials stored in the Seqera Platform.", + "_key": "f0f6f97e052e" + } + ], + "_type": "block" + }, + { + "children": [ + { + "_key": "263d37f8fa4d0", + "_type": "span", + "marks": [], + "text": "Nextflow requires that the `accessToken` and `workspaceId` for the Seqera workspace containing the registry credentials be supplied in the `nextflow.config` file (above) so that the containers can be persisted in the user’s preferred registry." + } + ], + "_type": "block", + "style": "normal", + "_key": "bf3fed1845eb", + "markDefs": [] + }, + { + "children": [ + { + "_type": "span", + "marks": [], + "text": "The personal authorization token (`tower.accessToken`) required to access the Seqera API can be generated in the user menu under `Your Tokens` from within the Seqera web interface. See the ", + "_key": "c06562019e950" + }, + { + "_type": "span", + "marks": [ + "cf58d80fe1f7" + ], + "text": "Seqera documentation", + "_key": "c06562019e951" + }, + { + "_type": "span", + "marks": [], + "text": " for instructions on how to create a Docker Hub personal access token (PAT) and store it as a credential in your organization workspace.", + "_key": "c06562019e952" + } + ], + "_type": "block", + "style": "normal", + "_key": "5de92052a601", + "markDefs": [ + { + "href": "https://docs.seqera.io/platform/23.4.0/credentials/docker_hub_registry_credentials", + "_key": "cf58d80fe1f7", + "_type": "link" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "text": "", + "_key": "9309640448c3", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal", + "_key": "8f9be69efae9" + }, + { + "children": [ + { + "marks": [ + "code" + ], + "text": "```", + "_key": "d615f1088d78", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "f168616d8b0a", + "markDefs": [] + }, + { + "children": [ + { + "text": "$ nextflow run rnaseq-nf -c ./wave-singularity.config\n\nN E X T F L O W ~ version 24.02.0-edge\n\n┃ Launching `https://github.com/nextflow-io/rnaseq-nf` [serene_montalcini] DSL2 - revision: 8253a586cc [master]\n\nR N A S E Q - N F P I P E L I N E\n===================================\ntranscriptome: /home/ubuntu/.nextflow/assets/nextflow-io/rnaseq-nf/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa\nreads : /home/ubuntu/.nextflow/assets/nextflow-io/rnaseq-nf/data/ggal/ggal_gut_{1,2}.fq\noutdir : results\n\nexecutor > local (4)\n[1f/af2ca7] RNA…ggal_1_48850000_49020000) | 1 of 1 ✔\n[d0/afbc55] RNA…STQC (FASTQC on ggal_gut) | 1 of 1 ✔\n[b0/f9587a] RNASEQ:QUANT (ggal_gut) | 1 of 1 ✔\n[f0/093b45] MULTIQC | 1 of 1 ✔\n\nDone! Open the following report in your browser --> results/multiqc_report.htm\n```", + "_key": "b33be1859e41", + "_type": "span", + "marks": [ + "code" + ] + }, + { + "_type": "span", + "marks": [], + "text": "\n\nYou can use the `nextflow inspect` command to view the path to the containers built and pushed to the repo by wave as follows:", + "_key": "158aca23997a" + } + ], + "_type": "block", + "style": "normal", + "_key": "882a46d001e4", + "markDefs": [] + }, + { + "children": [ + { + "marks": [ + "code" + ], + "text": "```", + "_key": "ecc97197456b", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "1733fbc04666", + "markDefs": [] + }, + { + "style": "normal", + "_key": "93b2248f6ec6", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "$ ", + "_key": "537f7c3062a30" + }, + { + "_type": "span", + "marks": [ + "strong", + "code" + ], + "text": "nextflow inspect rnaseq-nf -c ./wave-singularity.config", + "_key": "537f7c3062a31" + } + ], + "_type": "block" + }, + { + "style": "normal", + "_key": "3a58128b29f6", + "markDefs": [], + "children": [ + { + "_key": "bb2c993fd85b0", + "_type": "span", + "marks": [ + "code" + ], + "text": "{" + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "text": "\"processes\": [", + "_key": "ecda074c68b50", + "_type": "span", + "marks": [ + "code" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "eefb7103ac4b" + }, + { + "children": [ + { + "text": "{", + "_key": "3992d97323560", + "_type": "span", + "marks": [ + "code" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "63aa00b73ce3", + "markDefs": [] + }, + { + "style": "normal", + "_key": "716d0852c154", + "markDefs": [], + "children": [ + { + "_key": "43ccc11369ee0", + "_type": "span", + "marks": [ + "code" + ], + "text": "\"name\": \"RNASEQ:INDEX\"," + } + ], + "_type": "block" + }, + { + "markDefs": [], + "children": [ + { + "text": "\"container\": \"docker://docker.io//wavebuild:salmon-1.10.2--fdce05f6d77af751\"", + "_key": "72ba851acb1a0", + "_type": "span", + "marks": [ + "code" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "f0c73c40eefd" + }, + { + "_type": "block", + "style": "normal", + "_key": "f5f9077d398b", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "},", + "_key": "153c06ad50660" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "marks": [ + "code" + ], + "text": "{", + "_key": "a09787bd89690", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "5feee5c7500d" + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "\"name\": \"RNASEQ:QUANT\",", + "_key": "846867e51f060" + } + ], + "_type": "block", + "style": "normal", + "_key": "ebd95ef2f9c7", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "492796aeea20", + "markDefs": [], + "children": [ + { + "_key": "f7101f985ff60", + "_type": "span", + "marks": [ + "code" + ], + "text": "\"container\": \"docker://docker.io//wavebuild:salmon-1.10.2--fdce05f6d77af751\"" + } + ] + }, + { + "_key": "28bcff4b8875", + "markDefs": [], + "children": [ + { + "text": "},", + "_key": "8262013529ff0", + "_type": "span", + "marks": [ + "code" + ] + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "{", + "_key": "f16a1eeab71f0" + } + ], + "_type": "block", + "style": "normal", + "_key": "18c3cb0bdba5", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "\"name\": \"MULTIQC\",", + "_key": "098094fb8c7f0" + } + ], + "_type": "block", + "style": "normal", + "_key": "71076f51cbb2" + }, + { + "_key": "3df10d4ae98f", + "markDefs": [], + "children": [ + { + "_key": "dafd7cf086500", + "_type": "span", + "marks": [ + "code" + ], + "text": "\"container\": \"docker://docker.io//wavebuild:multiqc-1.17--d85209f21556c472\"" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "aa7e2d2a0ca4", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "},", + "_key": "9d65718ed7d60" + } + ], + "_type": "block", + "style": "normal" + }, + { + "children": [ + { + "marks": [ + "code" + ], + "text": "{", + "_key": "a3fdd44e78bb0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "57f8a7dc1545", + "markDefs": [] + }, + { + "_key": "320f5b79d359", + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "\"name\": \"RNASEQ:FASTQC\",", + "_key": "ea5db7353b070" + } + ], + "_type": "block", + "style": "normal" + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "\"container\": \"docker://docker.io//wavebuild:fastqc-0.12.1--f44601bdd08701ed\"", + "_key": "bbd68658fe1a0" + } + ], + "_type": "block", + "style": "normal", + "_key": "92f7f61d9dcb" + }, + { + "children": [ + { + "marks": [ + "code" + ], + "text": "}", + "_key": "e7ac774e844c0", + "_type": "span" + } + ], + "_type": "block", + "style": "normal", + "_key": "ec810778d2d7", + "markDefs": [] + }, + { + "_type": "block", + "style": "normal", + "_key": "7f5e28a4b94d", + "markDefs": [], + "children": [ + { + "_key": "40b4ab61fd7a0", + "_type": "span", + "marks": [ + "code" + ], + "text": "]" + } + ] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "}\n```", + "_key": "e367b0fb78e30" + } + ], + "_type": "block", + "style": "normal", + "_key": "70aa1a5fd670" + }, + { + "_key": "e66b050d19a4", + "markDefs": [], + "children": [ + { + "text": "Singularity containers built by Wave can be stored locally on your HPC cluster or be served from your preferred registry at runtime providing tremendous flexibility.\n", + "_key": "2a0e54e352340", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "normal" + }, + { + "_key": "38af6aad3526", + "markDefs": [], + "children": [ + { + "text": "Conclusion", + "_key": "093014d7b5800", + "_type": "span", + "marks": [] + } + ], + "_type": "block", + "style": "h2" + }, + { + "markDefs": [], + "children": [ + { + "_key": "7047964024b60", + "_type": "span", + "marks": [], + "text": "Nextflow continues to improve pipeline portability and reproducibility across clusters and cloud computing environments by providing the widest support for container runtimes and cutting-edge functionality for Singularity users.\n\nToday, Nextflow supports Apptainer, Singularity, Charliecloud, Docker, Podman, Sarus, and Shifter with rich support for native Singularity and OCI container formats. Nextflow can run both container formats served from multiple sources, including Singularity Hub, Singularity Library, or any Docker/OCI-compliant registry.\n" + } + ], + "_type": "block", + "style": "normal", + "_key": "51ef6c751d30" + }, + { + "children": [ + { + "text": "adssd", + "_key": "d3b16e468697", + "_type": "span", + "marks": [ + "code" + ] + } + ], + "_type": "block", + "style": "normal", + "_key": "20152028f035", + "markDefs": [] + }, + { + "markDefs": [], + "children": [ + { + "_type": "span", + "marks": [ + "code" + ], + "text": "", + "_key": "f50b33bf11e8" + } + ], + "_type": "block", + "style": "normal", + "_key": "141255fba432" + }, + { + "_key": "7787d7ae1b7b", + "markDefs": [], + "children": [ + { + "marks": [ + "code" + ], + "text": "", + "_key": "92529617dfbb", + "_type": "span" + } + ], + "_type": "block", + "style": "normal" + }, + { + "_type": "script", + "_key": "b39fdec04a39" + }, + { + "_key": "e59aa97a0474", + "code": "
Test
", + "_type": "code", + "language": "html" + }, + { + "_key": "664ae0fdfa7f", + "_type": "script" + } + ], + "_createdAt": "2024-04-15T12:03:42Z", + "_type": "blogPost", + "_updatedAt": "2024-10-07T16:14:54Z" + } +] \ No newline at end of file diff --git a/internal/step3/backup.mjs b/internal/step3/backup.mjs new file mode 100644 index 00000000..c40471bd --- /dev/null +++ b/internal/step3/backup.mjs @@ -0,0 +1,24 @@ +import fs from 'fs'; +import path from 'path'; +import sanityClient from '@sanity/client'; + +const outputFile = path.join(process.cwd(), 'backup.json'); + +export const client = sanityClient({ + projectId: 'o2y1bt2g', + dataset: 'seqera', + token: process.env.SANITY_TOKEN, + useCdn: false, +}); + + +async function fetchBlogPosts() { + return await client.fetch(`*[_type == "blogPost"]`); +} + +async function doBackup() { + const posts = await fetchBlogPosts(); + fs.writeFileSync(outputFile, JSON.stringify(posts, null, 2)); +} + +doBackup(); \ No newline at end of file diff --git a/internal/step3/migrateBlogType.mjs b/internal/step3/migrateBlogType.mjs new file mode 100644 index 00000000..310e08b6 --- /dev/null +++ b/internal/step3/migrateBlogType.mjs @@ -0,0 +1,46 @@ +import sanityClient from '@sanity/client'; +import { customAlphabet } from 'nanoid'; + +const nanoid = customAlphabet('0123456789abcdef', 12); + +export const client = sanityClient({ + projectId: 'o2y1bt2g', + dataset: 'seqera', + token: process.env.SANITY_TOKEN, + useCdn: false, +}); + +async function fetchBlogPostsDev() { + return await client.fetch(`*[_type == "blogPostDev"]`); +} + +async function fetchBlogPosts() { + return await client.fetch(`*[_type == "blogPost"]`); +} + +async function migrateBlogType() { + console.log('🟢🟢🟢 Migrating'); + const devPosts = await fetchBlogPostsDev(); + const posts = await fetchBlogPosts(); + + for (const post of devPosts) { + console.log('🔵 >> ', post.meta.slug.current); + const existing = posts.find(p => p.meta.slug.current === post.meta.slug.current); + if (!!existing) { + console.log('🟡 exists >> ', existing.meta.slug.current); + console.log('🟡 skipping >> ', existing.title); + continue; + } + const newPost = { + ...post, + _type: 'blogPost', + _id: nanoid(), + _rev: undefined, + } + const p = await client.create(newPost); + console.log('🟢 created >> ', p.title); + } + console.log('🟢🟢🟢 Done'); +} + +migrateBlogType(); \ No newline at end of file