Merge branch 'axelwalter:main' into main

Wang-Bioinformatics-Lab · Mar 4, 2024 · e8aaacc · e8aaacc
2 parents 12d3828 + 63426ea
commit e8aaacc
Show file tree

Hide file tree

Showing 20 changed files with 407 additions and 118 deletions.
diff --git a/.github/workflows/build-windows-executable-app.yaml b/.github/workflows/build-windows-executable-app.yaml
@@ -0,0 +1,84 @@
+name: Build executable for Windows
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build-executable:
+    runs-on: windows-latest
+
+    env:
+      PYTHON_VERSION: 3.11.0
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+
+    - name: Setup virtual environment
+      shell: cmd
+      run: |
+        python -m venv myenv
+
+        call myenv\Scripts\activate.bat   
+
+        pip install -r requirements.txt
+
+        pip install pyinstaller
+
+    - name: Write function to cli.py
+      shell: bash
+      run: |
+        dir D:/a/streamlit-metabolomics-statistics/streamlit-metabolomics-statistics/myenv/Lib/site-packages
+        head -n -2 D:/a/streamlit-metabolomics-statistics/streamlit-metabolomics-statistics/myenv/Lib/site-packages/streamlit/web/cli.py > temp_cli.py
+
+        cat << EOF >> temp_cli.py
+        def _main_run_clExplicit(file, command_line, args=[], flag_options=[]):
+            main._is_running_with_streamlit = True
+            bootstrap.run(file, command_line, args, flag_options)
+
+
+        if __name__ == "__main__":
+            main()
+        EOF
+
+        mv temp_cli.py D:/a/streamlit-metabolomics-statistics/streamlit-metabolomics-statistics/myenv/Lib/site-packages/streamlit/web/cli.py  
+
+    - name: Compile app with pyinstaller
+      shell: cmd
+      run: |
+        call myenv\Scripts\activate.bat 
+        pyinstaller --onefile --additional-hooks-dir ./hooks run.py --clean
+
+
+    - name: Copy everything to dist directory
+      run: |
+        cp -r .streamlit dist/.streamlit
+        cp -r pages dist/pages
+        cp -r src dist/src
+        cp -r assets dist/assets
+        cp -r example-data dist/example-data
+        cp Statistics_for_Metabolomics.py dist/
+
+    - name: Modify .spec file
+      shell: bash
+      run: |
+        cp run_app_temp.spec run_app.spec
+
+    - name: Make executable
+      shell: cmd
+      run: |
+        call myenv\Scripts\activate.bat
+        pyinstaller run_app.spec --clean
+
+    - name: Upload artifact
+      uses: actions/upload-artifact@v3
+      with:
+        name: FBMN-Stats-App
+        path: dist
diff --git a/.streamlit/config.toml b/.streamlit/config.toml
@@ -1,2 +1,9 @@
 [theme]
-base="light"
+base="light"
+
+[global]
+developmentMode = false
+
+[server]
+port = 8502
+maxUploadSize=512
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ A web app implementation of the [statistics notebooks](https://github.com/Functi
 `streamlit run Statistics_for_Metabolomics.py`
 
 ## Available Statistics
-- Principle Component Analysis (PCA)
+- Principal Component Analysis (PCA)
 - Multivariate
     - PERMANOVA & PCoA
 - Hierachical Clustering & Heatmaps

diff --git a/Statistics_for_Metabolomics.py b/Statistics_for_Metabolomics.py
@@ -1,60 +1,141 @@
 import streamlit as st
+import base64
 from src.common import *
 
 page_setup()
 
-# st.title("The hitchhiker's guide to statistical analysis of metabolomics data")
-
 st.image("assets/FBMN-STATS-GUIed_logo2.png", use_column_width=True)
 
-st.markdown(
-    """
-## Quickstart
+st.markdown("""
+## Quickstart Guide
 
-A web app implementation of the [statistics notebooks](https://github.com/Functional-Metabolomics-Lab/Statistical-analysis-of-non-targeted-LC-MSMS-data) for metabolomics by the [Functional Metabolomics Lab](https://github.com/Functional-Metabolomics-Lab).
-These notebooks are developed by the Virtual Multi Omics Lab ([VMOL](https://vmol.org/)).
+Welcome to the FBMN-STATS, a web app implementation of the [statistics notebooks](https://github.com/Functional-Metabolomics-Lab/Statistical-analysis-of-non-targeted-LC-MSMS-data) for metabolomics by the [Functional Metabolomics Lab](https://github.com/Functional-Metabolomics-Lab).
+             as part of the article '[The Hitchhiker’s Guide to Statistical Analysis of Feature-based Molecular Networks from Non-Targeted Metabolomics Data](https://doi.org/10.26434/chemrxiv-2023-wwbt0)'. 
+            These notebooks are developed by the Virtual Multi Omics Lab ([VMOL](https://vmol.org/)).
+            This app facilitates downstream statistical analysis of Feature-Based Molecular Networking data, simplifying the process for researchers.       
 
-Once you have completed the **Data Preparation** step, chose any of the available statistics sections.
+### Getting Started
+This web app requires two primary inputs: a feature quantification table and metadata table. Users with FBMN Job IDs from GNPS or GNPS2 can easily fetch necessary files by entering the job IDs via the 'Data Preparation' page. 
+            This page also offers subsequent data cleanup steps such as Blank Removal, Imputation, and Normalization.
 
-💡 **All plots are interactive!**
-- select area with your mouse to zoom in
-- double click in the plot to zoom back out
-- save plots using the camera icon in the top right corner (specify image format in settings panel)
-
-### Data Preparation
-- two tables are required: **Quantification** and **Meta Data**
-- supported formats: `tsv` and `txt` (tab separated), `csv` (comma separated) and `xlsx` (Excel file)
-- if feature table has an optional **metabolite** column that will be taken as index (can be unique ID, contain `m/z` and `RT` information or actual metabolite name)
-- feature index can be automatically generated if columns for `m/z` and `RT` (and optionally `row ID`) are present
-- sample file names need to contain `mzML` file name extensions
-- quantification table needs sample file names as column names
-- meta data table **requires** a `filename` column
-- meta data table can contain columns with attributes
-- checkout the **example data** availabe in file selection
-- remove blank features and impute missing values in the **Data Cleanup** section
-
-Example feature table:
+**⚠️ Warning:** Our data cleanup options, including normalization methods like Total Ion Count (TIC) normalization and center-scaling, are selected due to their widespread use. However, we recognize that these methods may not be suitable for all types of data. We encourage users to consider various normalization techniques to best suit their dataset's needs.
+            This tool aims to offer a quick results overview and serves primarily for educational purposes. For comprehensive insights into data cleanup methodologies, please refer to our article and the referenced literature.
+""")
+
+
+st.subheader('Data Preparation Essentials')
+st.markdown("""
+- Required tables: **Quantification** and **Metadata**
+- Supported formats include: `tsv`, `txt`, `csv`, and `xlsx`
+- Feature tables with an **metabolite** column are indexed accordingly
+- Automatic feature indexing available with `m/z`, `RT`, and optional `row ID` columns
+- Sample filenames must include `mzML` extensions
+- The metadata table must have a `filename` column and can include attribute columns
+- Example data available for reference
+- Proceed to **Data Cleanup** for blank removal and missing value imputation
+""")
+
+st.markdown("""          
+Example feature table:  
+ 
 |metabolite|sample1.mzML|sample2.mzML|blank.mzML|
 |---|---|---|---|
 |1|1000|1100|100|
 |2|2000|2200|200|
-
+""")
+st.write(' ')
+st.markdown("""        
 Example meta data table:
+            
 |filename|Sample_Type|Time_Point|
 |---|---|---|
 |sample1.mzML|Sample|1h|
 |sample2.mzML|Sample|2h|
-|blank.mzML|Blank|N/A|
-
-
-### Available Statistics
-- Principle Component Analysis (PCA)
-- Multivariate
-    - PERMANOVA & PCoA
-- Hierachical Clustering & Heatmaps
-- Univariate 
-    - One-way ANOVA & Tukey's post hoc test
-    - Kruskal-Wallis & Dunn's post hoc test
-- Student's t-test
-"""
+|blank.mzML|Blank|N/A| 
+""")
+
+st.write(' ')
+st.subheader('Statistical Analyses Available')
+st.markdown("""
+- **PCA**
+- **Multivariate Analysis:** PERMANOVA & PCoA
+- **Hierarchical Clustering & Heatmaps**
+- **Univariate Analysis:** ANOVA, Tukey's, Student's t-test, Kruskal-Wallis & Dunn's
+""")
+
+
+st.subheader('Outputs')
+st.markdown("""
+Generated results include csv tables and images, aiding in data presentation and publication.
+""")
+
+
+st.subheader('Interactive Plots')
+st.markdown("""
+💡 **All plots are interactive!**
+- Select area with your mouse to zoom in
+- Double click in the plot to zoom back out
+- Save plots using the camera icon in the top right corner (specify image format in settings panel)
+""")
+
+
+st.subheader('Settings Panel')
+st.markdown("""
+1. **P-value Correction:** These are FDR (False Disovery Rate) corrections applied for multiple univariate tests. Available options include Bonferroni, Sidak, Benjamini-Hochberg (BH), Benjamini-Yekutieli (BY), and an option for no correction, with Bonferroni set as the default.  
+            While Bonferroni is known for controlling false positives, it may inadvertently increase false negatives. Advanced methods like BH and BY aim to balance true discoveries against false positives more effectively. We recommend BH for FDR correction to optimize analysis outcomes.
+            
+    Note that changing the p-value correction settings does not automatically update the corrected p-values. To update results re-run the analysis.
+
+2. **Image Export Format:** Choose from svg, png, jpeg, webp. Recommendations: png for presentations, svg for publications.
+""")
+
+
+st.subheader('Limitations of the App')
+st.markdown("""
+- Be mindful of the 200 MB data limit. There is a possibility of server slowdowns or crashes with larger datasets. The GUI's simplicity and limited graphical options are designed for introductory purposes and does not allow customization for deeper analysis. 
+""")
+
+
+st.subheader('Citation and Resources')
+st.markdown("""
+For citations and further resources, please refer to our [article](https://doi.org/10.26434/chemrxiv-2023-wwbt0).
+""")
+
+
+st.subheader("We Value Your Feedback")
+st.markdown("""
+Your feedback and suggestions are invaluable to us as we strive to enhance this tool. 
+            As a small team, we greatly appreciate any contributions you can make. 
+            Please feel free to create an issue on our GitHub repository to share your thoughts or report any issues you encounter.
+
+[Create an Issue on GitHub](https://github.com/Functional-Metabolomics-Lab/FBMN-STATS/issues/new)
+""")
+
+st.subheader("Functional-Metabolomics-Lab")
+
+c1, c2, c3 = st.columns(3)
+c1.markdown(
+    """<a href="https://github.com/Functional-Metabolomics-Lab">
+    <img src="data:image/png;base64,{}" width="100">
+    </a>""".format(
+        base64.b64encode(open("./assets/github-logo.png", "rb").read()).decode()
+    ),
+    unsafe_allow_html=True,
 )
+c2.markdown(
+    """<a href="https://www.youtube.com/@functionalmetabolomics">
+    <img src="data:image/png;base64,{}" width="100">
+    </a>""".format(
+        base64.b64encode(open("./assets/youtube-logo.png", "rb").read()).decode()
+    ),
+    unsafe_allow_html=True,
+)
+c3.markdown(
+    """<a href="https://twitter.com/func_metabo_lab">
+    <img src="data:image/png;base64,{}" width="100">
+    </a>""".format(
+        base64.b64encode(open("./assets/x-logo.png", "rb").read()).decode()
+    ),
+    unsafe_allow_html=True,
+)
+
diff --git a/assets/github-logo.png b/assets/github-logo.png
diff --git a/assets/x-logo.png b/assets/x-logo.png
diff --git a/assets/youtube-logo.png b/assets/youtube-logo.png
diff --git a/hooks/hook-streamlit.py b/hooks/hook-streamlit.py
@@ -0,0 +1,15 @@
+from PyInstaller.utils.hooks import copy_metadata
+
+datas = []
+datas += copy_metadata("streamlit")
+datas += copy_metadata("plotly")
+datas += copy_metadata("pingouin")
+datas += copy_metadata("openpyxl")
+datas += copy_metadata("kaleido")
+datas += copy_metadata("scikit_posthocs")
+datas += copy_metadata("gnpsdata")
+datas += copy_metadata("scikit_learn")
+datas += copy_metadata("tabulate")
+datas += copy_metadata("networkx")
+datas += copy_metadata("pandas_flavor")
+datas += copy_metadata("numpy")
diff --git a/pages/1_📁_Data_Preparation.py b/pages/1_📁_Data_Preparation.py
@@ -27,6 +27,7 @@
         ft, md = load_example()
 
     if file_origin == "GNPS(2) task ID" or file_origin == "Example dataset from publication":
+        st.warning("💡 This tool only supports task ID from GNPS1 and 2 not from Quickstart GNPS1.")
         if file_origin == "Example dataset from publication":
             task_id_default = "b661d12ba88745639664988329c1363e" # 63e8b3da08df41fe95031e4710e0476b
 

diff --git a/...s/2_Principle_Component_Analysis_(PCA).py → ...s/2_Principal_Component_Analysis_(PCA).py b/...s/2_Principle_Component_Analysis_(PCA).py → ...s/2_Principal_Component_Analysis_(PCA).py
@@ -6,7 +6,7 @@
 
 # pd.concat([st.session_state.md, st.session_state.data], axis=1)
 
-st.markdown("# Principle Component Analysis (PCA)")
+st.markdown("# Principal Component Analysis (PCA)")
 
 with st.expander("📖 About"):
     st.markdown(
@@ -33,7 +33,7 @@
         fig = get_pca_scatter_plot(
             pca_df, pca_variance, st.session_state.pca_attribute, st.session_state.md
         )
-        show_fig(fig, "principle-component-analysis")
+        show_fig(fig, "principal-component-analysis")
     with t2:
         fig = get_pca_scree_plot(pca_df, pca_variance)
         show_fig(fig, "pca-variance")