1.4.8

TillMacher · Jan 26, 2023 · c3295b5 · c3295b5
1 parent 82caa91
commit c3295b5
Show file tree

Hide file tree

Showing 18 changed files with 341 additions and 269 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/README.md b/README.md
@@ -93,12 +93,26 @@ all modules (OTU traits will simply be ignored if not required).
 
 </details>
 
-## Change log
+## Change log (only for major changes)
+
+### v 1.4.8
+
+  * New feature:
+  * Added support for easier read and taxonomy table conversion from APSCALE.
+  * Important changes:
+      -> Taxonomy table sheet name for the APSCALE import changed to 'Taxonomy table’
+      -> Read table column name for OTU sequences changed to 'Seq'
+      -> Those are the default names generated in APSCALE and makes importing data easier.
+
+  * Bug fixes:
+  * Y-axes for alpha diversity and rarefaction plots start at 0.
+  * Fixed crash of the venn diagram module.
+  * Fixed potentially remaining zero read OTUs after read-based rarefaction.
 
 ### v 1.4.5
   TTT change log v 1.4.5
 
-  * Added trait import to the data conversion modules. 
+  * Added trait import to the data conversion modules.
 
 
 ### v 1.4.4

diff --git a/_tutorial_files/tutorial_read_table_TTT.xlsx b/_tutorial_files/tutorial_read_table_TTT.xlsx
diff --git a/_tutorial_files/tutorial_taxonomy_table.xlsx b/_tutorial_files/tutorial_taxonomy_table.xlsx
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="taxontabletools", # Replace with your own username
-    version="1.4.3",
+    version="1.4.8",
     author="Till-Hendrik Macher",
     author_email="[email protected]",
     description="TaxonTableTools - A comprehensive, platform-independent graphical user interface software to explore and visualise DNA metabarcoding data",
@@ -23,7 +23,7 @@
                         'openpyxl>=3.0.3',
                         'xlsxwriter>=1.2.7',
                         'biopython>=1.77',
-                        'scikit-bio>=0.5.6',
+                        'scikit-bio==0.5.6',
                         'requests_html>=0.10.0',
                         'scipy>=1.5.1',
                         'shapely>=1.7.1',

diff --git a/taxontabletools/.DS_Store b/taxontabletools/.DS_Store
diff --git a/taxontabletools/__main__.py b/taxontabletools/__main__.py
diff --git a/taxontabletools/alpha_diversity.py b/taxontabletools/alpha_diversity.py
@@ -113,6 +113,7 @@ def alpha_diversity_scatter_plot(TaXon_table_xlsx, meta_data_to_test, width, hei
         for category, color in zip(sorted(set(categories)), color_discrete_sequence):
             fig.add_trace(go.Scatter(x=samples_dict[category], y=observed_otus_dict[category], mode='markers', name=category, marker=dict(color=color, size=int(scatter_size))))
         fig.update_layout(height=int(heigth), width=int(width), template=template, yaxis_title=title, showlegend=True, font_size=font_size, title_font_size=font_size)
+        fig.update_yaxes(rangemode="tozero")
 
         ## finish script
         output_pdf = Path(str(path_to_outdirs) + "/" + "Alpha_diversity" + "/" + TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" + taxon_title + "_scatter_plot.pdf")
@@ -255,6 +256,7 @@ def alpha_diversity_boxplot(TaXon_table_xlsx, meta_data_to_test, width, heigth,
         for category, color in zip(sorted(set(categories)), color_discrete_sequence):
             fig.add_trace(go.Box(y=observed_otus_dict[category], name=category, marker_color=color, marker_line_color="Black", marker_line_width=0.2, opacity=opacity_value))
         fig.update_layout(height=int(heigth), width=int(width), template=template, yaxis_title=title, showlegend=False, font_size=font_size, title_font_size=font_size)
+        fig.update_yaxes(rangemode="tozero")
 
         ## finish script
         output_pdf = Path(str(path_to_outdirs) + "/" + "Alpha_diversity" + "/" + TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" + taxon_title + "_boxplot.pdf")

diff --git a/taxontabletools/check_read_table_format.py b/taxontabletools/check_read_table_format.py
@@ -23,8 +23,8 @@ def check_read_table_format_TTT(read_table_xlsx):
         sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
         raise RuntimeError(ErrorMessage)
 
-    if header_prompt_sequences != "Sequences":
-        ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequences"
+    if header_prompt_sequences != "Seq":
+        ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequence or Seq"
         sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
         raise RuntimeError(ErrorMessage)
 
@@ -117,8 +117,8 @@ def check_read_table_format_qiime2(read_table_xlsx):
         sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
         raise RuntimeError(ErrorMessage)
 
-    if header_prompt_sequences != "Sequence":
-        ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequence"
+    if header_prompt_sequences != "Seq":
+        ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequence or Seq"
         sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
         raise RuntimeError(ErrorMessage)
 

diff --git a/taxontabletools/check_taxononomy_table_format.py b/taxontabletools/check_taxononomy_table_format.py
@@ -6,6 +6,9 @@
 # check the input format
 def check_taxononomy_table_format(taxonomy_results_xlsx, sheet_name):
 
+    if sheet_name == 'APSCALE':
+        sheet_name = 'Taxonomy table'
+
     try:
         taxonomy_table_df = pd.read_excel(Path(taxonomy_results_xlsx), sheet_name)
         taxonomy_table_df = taxonomy_table_df.replace(np.nan, 'nan', regex=True)

diff --git a/taxontabletools/normalize_reads.py b/taxontabletools/normalize_reads.py
@@ -104,6 +104,19 @@ def normalize_reads(TaXon_table_xlsx, path_to_outdirs, sub_sample_size):
 
     window_progress_bar.Close()
 
+    ## remove empty OTUs
+    header = df_out.columns.tolist()
+    row_filter_list = []
+    for row in df_out.values.tolist():
+        reads = sum(row[10:])
+        if reads != 0:
+            row_filter_list.append(row)
+        else:
+            print('Removed: {}'.format(row[0]))
+
+    df_out = pd.DataFrame(row_filter_list)
+    df_out.columns = header
+
     ## add already existing metadata back to the df
     if len(TaXon_table_df_metadata.columns) != 1:
         df_out = add_metadata(df_out, TaXon_table_df_metadata)

diff --git a/taxontabletools/rarefaction_curve.py b/taxontabletools/rarefaction_curve.py
@@ -118,13 +118,14 @@ def average(lst):
 
     # draw the plot
     draws = [i+1 for i in rarefaction_dict_average.keys()]
-    n_species = list(rarefaction_dict_average.values())
+    n_species = [float(i) for i in list(rarefaction_dict_average.values())]
     error_bar = list(rarefaction_dict_stdef.values())
     y_axis_title = "# " + taxon_title
     fig = go.Figure(data=[go.Scatter(x=draws, y=n_species, error_y=dict(type='data', array=error_bar, thickness=0.5, width=3, visible=True))])
     fig.update_layout(title_text="repetitions = " + str(n_reps+1), yaxis_title=y_axis_title, xaxis_title="# samples")
     fig.update_traces(marker_color=color1, marker_line_color=color2, opacity=opacity_value)
     fig.update_layout(height=800, width=1200, template=template, showlegend=False, font_size=font_size, title_font_size=font_size)
+    fig.update_yaxes(rangemode="tozero")
 
     ## write files
     output_pdf = Path(str(path_to_outdirs) + "/" + "Rarefaction_curves" + "/" + TaXon_table_file.name + "_rarefaction_" + taxon_title + ".pdf")
@@ -246,7 +247,7 @@ def average(lst):
 
         ## add to plot
         draws = [i+1 for i in rarefaction_dict_average.keys()]
-        n_species = list(rarefaction_dict_average.values())
+        n_species = [float(i) for i in list(rarefaction_dict_average.values())]
         increase_dict[taxon] = n_species
         error_bar = list(rarefaction_dict_stdef.values())
         fig.add_trace(go.Scatter(x=draws, y=n_species, name=taxon, marker_color=color_dict[taxon], error_y=dict(type='data', array=error_bar, thickness=0.5, width=3, visible=True)))

diff --git a/taxontabletools/site_occupancy.py b/taxontabletools/site_occupancy.py
@@ -7,6 +7,7 @@
 import numpy as np
 from plotly.subplots import make_subplots
 from taxontabletools.taxontable_manipulation import strip_metadata
+from taxontabletools.taxontable_manipulation import aggregate_taxontable
 
 def site_occupancy_barchart(TaXon_table_xlsx, meta_data_to_test, taxonomic_level, path_to_outdirs, x_site_occ, y_site_occ, template, theme, font_size):
 
@@ -173,7 +174,7 @@ def site_occupancy_barchart(TaXon_table_xlsx, meta_data_to_test, taxonomic_level
     else:
         sg.PopupError("Please check your Metadata file and Taxon table file: The samples do not match or the metadata is unique for all samples!", keep_on_top=True)
 
-def site_occupancy_heatmap(TaXon_table_xlsx, path_to_outdirs, template, height, width, meta_data_to_test, taxonomic_level, font_size, color_discrete_sequence, add_categories_sum):
+def site_occupancy_heatmap_pa(TaXon_table_xlsx, path_to_outdirs, template, height, width, meta_data_to_test, taxonomic_level, font_size, color_discrete_sequence, add_categories_sum):
 
     ## load TaxonTable
     TaXon_table_xlsx = Path(TaXon_table_xlsx)
@@ -325,3 +326,158 @@ def site_occupancy_heatmap(TaXon_table_xlsx, path_to_outdirs, template, height,
 
         else:
             sg.Popup("The metadata table and taXon table are not matching!")
+
+def site_occupancy_heatmap_reads(TaXon_table_xlsx, path_to_outdirs, template, height, width, meta_data_to_test, taxonomic_level, font_size, color_discrete_sequence, add_categories_sum):
+
+    ## load TaxonTable
+    TaXon_table_xlsx = Path(TaXon_table_xlsx)
+    TaXon_table_df = pd.read_excel(TaXon_table_xlsx).fillna('')
+    TaXon_table_df = strip_metadata(TaXon_table_df)
+    TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
+
+    Meta_data_table_xlsx = Path(str(path_to_outdirs) + "/" + "Meta_data_table" + "/" + TaXon_table_xlsx.stem + "_metadata.xlsx")
+    Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx, header=0).fillna("nan")
+    Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()
+
+    ## drop samples with metadata called nan (= empty)
+    drop_samples = [i[0] for i in Meta_data_table_df.values.tolist() if i[1] == "nan"]
+
+    if drop_samples != []:
+        ## filter the TaXon table
+        TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1)
+        TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
+        ## also remove empty OTUs
+        row_filter_list = []
+        for row in TaXon_table_df.values.tolist():
+            reads = set(row[10:])
+            if reads != {0}:
+                row_filter_list.append(row)
+        columns = TaXon_table_df.columns.tolist()
+        TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns)
+        Meta_data_table_df = pd.DataFrame([i for i in Meta_data_table_df.values.tolist() if i[0] not in drop_samples], columns=Meta_data_table_df.columns.tolist())
+        Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()
+
+    metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()
+
+    ## create a y axis title text
+    taxon_title = taxonomic_level
+
+    ## adjust taxonomic level if neccessary
+    if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]:
+        taxon_title = taxonomic_level
+        taxonomic_level = "ID"
+
+    if len(set(metadata_list)) == 1:
+        sg.PopupError("Please choose more than one meta data category.")
+    else:
+        if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples):
+
+            ## define variables
+            samples = TaXon_table_samples
+            OTU_abundances_dict = {}
+            samples_metadata_list = []
+
+            ## aggregate taxontable, sort, and extract relevant taxa
+            TaXon_table_df = aggregate_taxontable(TaXon_table_df, taxonomic_level)
+            TaXon_table_df = TaXon_table_df.sort_values(['Phylum', taxonomic_level], ascending=[True, True])
+            TaXon_table_df = TaXon_table_df.loc[TaXon_table_df[taxonomic_level] != ''][[taxonomic_level] + samples]
+
+            ## create a list of samples for each category
+            category_dict = {}
+            for sample, category in zip(Meta_data_table_samples, metadata_list):
+                if category not in category_dict.keys():
+                    category_dict[category] = [sample]
+                else:
+                    category_dict[category] = category_dict[category] + [sample]
+
+            ## collect all available taxa
+            taxa = TaXon_table_df[taxonomic_level].values.tolist()
+
+            ## make a copy of the original df
+            TaXon_table_df_copy = TaXon_table_df.copy(deep=True)
+
+            ## convert table to log reads
+            for col in samples:
+                TaXon_table_df[col] = [np.log(i) if i !=0 else 0 for i in TaXon_table_df[col].values.tolist()]
+
+            ## calculate log max
+            global_max = max([max(i) for i in TaXon_table_df[samples].values.tolist()])
+
+            ## check if the respective species are present in the collections
+            taxon_presence_dict = {}
+            n_rows, row_heights = [], []
+
+            colorscales = ['blues', 'reds', 'greens', 'oranges' ,'BuPu', 'Greys'] * len(meta_data_to_test)
+
+            if (taxonomic_level == "Species" or taxonomic_level == "Genus"):
+                x_values = ["<i>" + taxon + "</i>" for taxon in taxa]
+            else:
+                x_values = taxa
+
+            if add_categories_sum == True:
+                for samples in category_dict.values():
+                    row_heights.append(len(samples))
+                row_heights.append(len(set(metadata_list)))
+                fig = make_subplots(rows=len(set(metadata_list)) + 1, cols=1, shared_xaxes=True, vertical_spacing=0.05, row_heights=row_heights)
+            else:
+                for samples in category_dict.values():
+                    row_heights.append(len(samples))
+                fig = make_subplots(rows=len(set(metadata_list)), cols=1, shared_xaxes=True, vertical_spacing=0.05, row_heights=row_heights)
+
+            row = 1
+            for metadata, samples in category_dict.items():
+                if type(samples) == "str":
+                    samples = [samples]
+                z_values = []
+                for sample in samples:
+                    reads = TaXon_table_df[sample].values.tolist()
+                    z_values = z_values + [reads]
+                y_values = samples
+                fig.add_trace(go.Heatmap(z=z_values, x=x_values, y=y_values, showscale=False, xgap=1, ygap=1, hoverongaps = False, zmin=0, zmax=global_max, colorscale=colorscales[row-1]), row=row, col=1)
+                row += 1
+
+                fig.write_html("/Users/tillmacher/Desktop/Paper/eRNA_paper/TEST.html")
+
+            if add_categories_sum == True:
+                z_values, y_values = [], []
+                for metadata, samples in category_dict.items():
+                    reads = [sum(reads) for reads in TaXon_table_df_copy[samples].values.tolist()]
+                    z_values = z_values + [[np.log(x) if x != 0 else 0 for x in reads]]
+                    y_values.append(metadata)
+                fig.add_trace(go.Heatmap(z=z_values[::-1], x=x_values, y=y_values[::-1], showscale=False, xgap=1, ygap=1, hoverongaps = False, colorscale='gray_r'), row=row, col=1)
+                row += 1
+
+            fig.update_layout(width=int(width), height=int(height), template="seaborn", font_size=font_size, yaxis_nticks=5, title_font_size=font_size)
+            fig.update_xaxes(tickmode='linear')
+            fig.update_yaxes(tickmode='linear')
+            fig.update_xaxes(tickangle=-90)
+
+            occupancy_plot_directory = Path(str(path_to_outdirs) + "/" + "Site_occupancy_plots" + "/" + TaXon_table_xlsx.stem)
+            if not os.path.exists(occupancy_plot_directory):
+                os.mkdir(occupancy_plot_directory)
+
+            ## define output files
+            output_pdf = Path(str(occupancy_plot_directory) + "/" + taxonomic_level + "_" + meta_data_to_test + "_heatmap_reads.pdf")
+            output_html = Path(str(occupancy_plot_directory) + "/" + taxonomic_level + "_" + meta_data_to_test + "_heatmap_reads.html")
+
+            ## write output files
+            fig.write_image(str(output_pdf))
+            fig.write_html(str(output_html))
+
+            ## ask to show file
+            answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
+            if answer == "Yes":
+                webbrowser.open('file://' + str(output_html))
+
+            ## print closing text
+            closing_text = "Site occupancy heatmaps are found under:\n" + '/'.join(str(output_pdf).split("/")[-4:])
+            sg.Popup(closing_text, title="Finished", keep_on_top=True)
+
+            ## write to log
+            from taxontabletools.create_log import ttt_log
+            placeholder = TaXon_table_xlsx.name + " (multiple site occupancy plots)"
+            ttt_log("site occupancy", "analysis", TaXon_table_xlsx.name, "", meta_data_to_test, path_to_outdirs)
+
+
+        else:
+            sg.Popup("The metadata table and taXon table are not matching!")
diff --git a/taxontabletools/table_comparison.py b/taxontabletools/table_comparison.py
@@ -69,6 +69,7 @@ def tc_alpha_diversity(TaXon_table_xlsx_1, TaXon_table_xlsx_2, path_to_outdirs,
     fig.add_trace(go.Box(y=y2, name=name_2, text=text_values2, marker_color=color_discrete_sequence[1]))
     fig.update_yaxes(title=taxon_title)
     fig.update_traces(boxpoints='all', jitter=0.5)
+    fig.update_yaxes(rangemode="tozero")
     fig.update_layout(width=int(width_value), height=int(height_value), template=template, showlegend=False, font_size=font_size)
 
     ## create a folder if neccessary
@@ -411,20 +412,20 @@ def tc_pairwise_sample_comparison(TaXon_table_xlsx_1, TaXon_table_xlsx_2, path_t
         df_out["Only " + name_2] = y_table_2_list
 
         ## Shared / Only barchart
-        fig.add_trace(go.Bar(name=name_1, orientation='h', y=x_samples, x=y_table_1_list, marker_color="rgb(141,160,203)"), row=1, col=1)
         fig.add_trace(go.Bar(name="Shared", orientation='h', y=x_samples, x=y_shared_list, marker_color="rgb(102,194,164)"), row=1, col=1)
+        fig.add_trace(go.Bar(name=name_1, orientation='h', y=x_samples, x=y_table_1_list, marker_color="rgb(141,160,203)"), row=1, col=1)
         fig.add_trace(go.Bar(name=name_2, orientation='h', y=x_samples, x=y_table_2_list, marker_color="rgb(252,141,98)"), row=1, col=1)
         fig.update_layout(barmode='stack', showlegend=False, width=int(width_value), height=int(height_value), template=template, title="", font_size=font_size)
         fig.update_yaxes(tickmode = 'linear', showgrid=False, row=1, col=1)
         fig.update_xaxes(title=taxon_title + " (%)", showgrid=True, row=1, col=1)
 
         ## Jaccard plot
         y = list(jaccard_dict.keys())
-        x = list(jaccard_dict.values())
+        x = [float(i) for i in list(jaccard_dict.values())]
         df_out["Jaccard dissimilarity"] = x
         fig.add_trace(go.Bar(y=y, x=x, name="Jaccard", orientation='h', marker_color="lightgrey"), row=1, col=2)
         fig.update_yaxes(tickmode = 'linear', showticklabels=False, showgrid=False, row=1, col=2)
-        fig.update_xaxes(title="jaccard diss.", dtick = 0.5, showgrid=True, range=[0,1], row=1, col=2)
+        fig.update_xaxes(title="jaccard diss.", showgrid=True, range=[0, 1], autorange=False, tick0=0, dtick=0.5, row=1, col=2)
 
         y = x_samples
         x = y_n_taxa