Skip to content

Commit

Permalink
1.4.8
Browse files Browse the repository at this point in the history
  • Loading branch information
TillMacher committed Jan 26, 2023
1 parent 82caa91 commit c3295b5
Show file tree
Hide file tree
Showing 18 changed files with 341 additions and 269 deletions.
Binary file modified .DS_Store
Binary file not shown.
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,26 @@ all modules (OTU traits will simply be ignored if not required).

</details>

## Change log
## Change log (only for major changes)

### v 1.4.8

* New feature:
* Added support for easier read and taxonomy table conversion from APSCALE.
* Important changes:
-> Taxonomy table sheet name for the APSCALE import changed to 'Taxonomy table’
-> Read table column name for OTU sequences changed to 'Seq'
-> Those are the default names generated in APSCALE and makes importing data easier.

* Bug fixes:
* Y-axes for alpha diversity and rarefaction plots start at 0.
* Fixed crash of the venn diagram module.
* Fixed potentially remaining zero read OTUs after read-based rarefaction.

### v 1.4.5
TTT change log v 1.4.5

* Added trait import to the data conversion modules.
* Added trait import to the data conversion modules.


### v 1.4.4
Expand Down
Binary file modified _tutorial_files/tutorial_read_table_TTT.xlsx
Binary file not shown.
Binary file modified _tutorial_files/tutorial_taxonomy_table.xlsx
Binary file not shown.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="taxontabletools", # Replace with your own username
version="1.4.3",
version="1.4.8",
author="Till-Hendrik Macher",
author_email="[email protected]",
description="TaxonTableTools - A comprehensive, platform-independent graphical user interface software to explore and visualise DNA metabarcoding data",
Expand All @@ -23,7 +23,7 @@
'openpyxl>=3.0.3',
'xlsxwriter>=1.2.7',
'biopython>=1.77',
'scikit-bio>=0.5.6',
'scikit-bio==0.5.6',
'requests_html>=0.10.0',
'scipy>=1.5.1',
'shapely>=1.7.1',
Expand Down
Binary file modified taxontabletools/.DS_Store
Binary file not shown.
156 changes: 65 additions & 91 deletions taxontabletools/__main__.py
100755 → 100644

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions taxontabletools/alpha_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def alpha_diversity_scatter_plot(TaXon_table_xlsx, meta_data_to_test, width, hei
for category, color in zip(sorted(set(categories)), color_discrete_sequence):
fig.add_trace(go.Scatter(x=samples_dict[category], y=observed_otus_dict[category], mode='markers', name=category, marker=dict(color=color, size=int(scatter_size))))
fig.update_layout(height=int(heigth), width=int(width), template=template, yaxis_title=title, showlegend=True, font_size=font_size, title_font_size=font_size)
fig.update_yaxes(rangemode="tozero")

## finish script
output_pdf = Path(str(path_to_outdirs) + "/" + "Alpha_diversity" + "/" + TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" + taxon_title + "_scatter_plot.pdf")
Expand Down Expand Up @@ -255,6 +256,7 @@ def alpha_diversity_boxplot(TaXon_table_xlsx, meta_data_to_test, width, heigth,
for category, color in zip(sorted(set(categories)), color_discrete_sequence):
fig.add_trace(go.Box(y=observed_otus_dict[category], name=category, marker_color=color, marker_line_color="Black", marker_line_width=0.2, opacity=opacity_value))
fig.update_layout(height=int(heigth), width=int(width), template=template, yaxis_title=title, showlegend=False, font_size=font_size, title_font_size=font_size)
fig.update_yaxes(rangemode="tozero")

## finish script
output_pdf = Path(str(path_to_outdirs) + "/" + "Alpha_diversity" + "/" + TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" + taxon_title + "_boxplot.pdf")
Expand Down
8 changes: 4 additions & 4 deletions taxontabletools/check_read_table_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def check_read_table_format_TTT(read_table_xlsx):
sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
raise RuntimeError(ErrorMessage)

if header_prompt_sequences != "Sequences":
ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequences"
if header_prompt_sequences != "Seq":
ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequence or Seq"
sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
raise RuntimeError(ErrorMessage)

Expand Down Expand Up @@ -117,8 +117,8 @@ def check_read_table_format_qiime2(read_table_xlsx):
sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
raise RuntimeError(ErrorMessage)

if header_prompt_sequences != "Sequence":
ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequence"
if header_prompt_sequences != "Seq":
ErrorMessage = "Oops! Something is wrong with the header: " + header_prompt_sequences + "\n" + "\n" + "Prompt: Sequence or Seq"
sg.PopupError(ErrorMessage, title="Error", keep_on_top=True)
raise RuntimeError(ErrorMessage)

Expand Down
3 changes: 3 additions & 0 deletions taxontabletools/check_taxononomy_table_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# check the input format
def check_taxononomy_table_format(taxonomy_results_xlsx, sheet_name):

if sheet_name == 'APSCALE':
sheet_name = 'Taxonomy table'

try:
taxonomy_table_df = pd.read_excel(Path(taxonomy_results_xlsx), sheet_name)
taxonomy_table_df = taxonomy_table_df.replace(np.nan, 'nan', regex=True)
Expand Down
13 changes: 13 additions & 0 deletions taxontabletools/normalize_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,19 @@ def normalize_reads(TaXon_table_xlsx, path_to_outdirs, sub_sample_size):

window_progress_bar.Close()

## remove empty OTUs
header = df_out.columns.tolist()
row_filter_list = []
for row in df_out.values.tolist():
reads = sum(row[10:])
if reads != 0:
row_filter_list.append(row)
else:
print('Removed: {}'.format(row[0]))

df_out = pd.DataFrame(row_filter_list)
df_out.columns = header

## add already existing metadata back to the df
if len(TaXon_table_df_metadata.columns) != 1:
df_out = add_metadata(df_out, TaXon_table_df_metadata)
Expand Down
5 changes: 3 additions & 2 deletions taxontabletools/rarefaction_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,14 @@ def average(lst):

# draw the plot
draws = [i+1 for i in rarefaction_dict_average.keys()]
n_species = list(rarefaction_dict_average.values())
n_species = [float(i) for i in list(rarefaction_dict_average.values())]
error_bar = list(rarefaction_dict_stdef.values())
y_axis_title = "# " + taxon_title
fig = go.Figure(data=[go.Scatter(x=draws, y=n_species, error_y=dict(type='data', array=error_bar, thickness=0.5, width=3, visible=True))])
fig.update_layout(title_text="repetitions = " + str(n_reps+1), yaxis_title=y_axis_title, xaxis_title="# samples")
fig.update_traces(marker_color=color1, marker_line_color=color2, opacity=opacity_value)
fig.update_layout(height=800, width=1200, template=template, showlegend=False, font_size=font_size, title_font_size=font_size)
fig.update_yaxes(rangemode="tozero")

## write files
output_pdf = Path(str(path_to_outdirs) + "/" + "Rarefaction_curves" + "/" + TaXon_table_file.name + "_rarefaction_" + taxon_title + ".pdf")
Expand Down Expand Up @@ -246,7 +247,7 @@ def average(lst):

## add to plot
draws = [i+1 for i in rarefaction_dict_average.keys()]
n_species = list(rarefaction_dict_average.values())
n_species = [float(i) for i in list(rarefaction_dict_average.values())]
increase_dict[taxon] = n_species
error_bar = list(rarefaction_dict_stdef.values())
fig.add_trace(go.Scatter(x=draws, y=n_species, name=taxon, marker_color=color_dict[taxon], error_y=dict(type='data', array=error_bar, thickness=0.5, width=3, visible=True)))
Expand Down
158 changes: 157 additions & 1 deletion taxontabletools/site_occupancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
from plotly.subplots import make_subplots
from taxontabletools.taxontable_manipulation import strip_metadata
from taxontabletools.taxontable_manipulation import aggregate_taxontable

def site_occupancy_barchart(TaXon_table_xlsx, meta_data_to_test, taxonomic_level, path_to_outdirs, x_site_occ, y_site_occ, template, theme, font_size):

Expand Down Expand Up @@ -173,7 +174,7 @@ def site_occupancy_barchart(TaXon_table_xlsx, meta_data_to_test, taxonomic_level
else:
sg.PopupError("Please check your Metadata file and Taxon table file: The samples do not match or the metadata is unique for all samples!", keep_on_top=True)

def site_occupancy_heatmap(TaXon_table_xlsx, path_to_outdirs, template, height, width, meta_data_to_test, taxonomic_level, font_size, color_discrete_sequence, add_categories_sum):
def site_occupancy_heatmap_pa(TaXon_table_xlsx, path_to_outdirs, template, height, width, meta_data_to_test, taxonomic_level, font_size, color_discrete_sequence, add_categories_sum):

## load TaxonTable
TaXon_table_xlsx = Path(TaXon_table_xlsx)
Expand Down Expand Up @@ -325,3 +326,158 @@ def site_occupancy_heatmap(TaXon_table_xlsx, path_to_outdirs, template, height,

else:
sg.Popup("The metadata table and taXon table are not matching!")

def site_occupancy_heatmap_reads(TaXon_table_xlsx, path_to_outdirs, template, height, width, meta_data_to_test, taxonomic_level, font_size, color_discrete_sequence, add_categories_sum):

## load TaxonTable
TaXon_table_xlsx = Path(TaXon_table_xlsx)
TaXon_table_df = pd.read_excel(TaXon_table_xlsx).fillna('')
TaXon_table_df = strip_metadata(TaXon_table_df)
TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]

Meta_data_table_xlsx = Path(str(path_to_outdirs) + "/" + "Meta_data_table" + "/" + TaXon_table_xlsx.stem + "_metadata.xlsx")
Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx, header=0).fillna("nan")
Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

## drop samples with metadata called nan (= empty)
drop_samples = [i[0] for i in Meta_data_table_df.values.tolist() if i[1] == "nan"]

if drop_samples != []:
## filter the TaXon table
TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1)
TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
## also remove empty OTUs
row_filter_list = []
for row in TaXon_table_df.values.tolist():
reads = set(row[10:])
if reads != {0}:
row_filter_list.append(row)
columns = TaXon_table_df.columns.tolist()
TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns)
Meta_data_table_df = pd.DataFrame([i for i in Meta_data_table_df.values.tolist() if i[0] not in drop_samples], columns=Meta_data_table_df.columns.tolist())
Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()

## create a y axis title text
taxon_title = taxonomic_level

## adjust taxonomic level if neccessary
if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]:
taxon_title = taxonomic_level
taxonomic_level = "ID"

if len(set(metadata_list)) == 1:
sg.PopupError("Please choose more than one meta data category.")
else:
if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples):

## define variables
samples = TaXon_table_samples
OTU_abundances_dict = {}
samples_metadata_list = []

## aggregate taxontable, sort, and extract relevant taxa
TaXon_table_df = aggregate_taxontable(TaXon_table_df, taxonomic_level)
TaXon_table_df = TaXon_table_df.sort_values(['Phylum', taxonomic_level], ascending=[True, True])
TaXon_table_df = TaXon_table_df.loc[TaXon_table_df[taxonomic_level] != ''][[taxonomic_level] + samples]

## create a list of samples for each category
category_dict = {}
for sample, category in zip(Meta_data_table_samples, metadata_list):
if category not in category_dict.keys():
category_dict[category] = [sample]
else:
category_dict[category] = category_dict[category] + [sample]

## collect all available taxa
taxa = TaXon_table_df[taxonomic_level].values.tolist()

## make a copy of the original df
TaXon_table_df_copy = TaXon_table_df.copy(deep=True)

## convert table to log reads
for col in samples:
TaXon_table_df[col] = [np.log(i) if i !=0 else 0 for i in TaXon_table_df[col].values.tolist()]

## calculate log max
global_max = max([max(i) for i in TaXon_table_df[samples].values.tolist()])

## check if the respective species are present in the collections
taxon_presence_dict = {}
n_rows, row_heights = [], []

colorscales = ['blues', 'reds', 'greens', 'oranges' ,'BuPu', 'Greys'] * len(meta_data_to_test)

if (taxonomic_level == "Species" or taxonomic_level == "Genus"):
x_values = ["<i>" + taxon + "</i>" for taxon in taxa]
else:
x_values = taxa

if add_categories_sum == True:
for samples in category_dict.values():
row_heights.append(len(samples))
row_heights.append(len(set(metadata_list)))
fig = make_subplots(rows=len(set(metadata_list)) + 1, cols=1, shared_xaxes=True, vertical_spacing=0.05, row_heights=row_heights)
else:
for samples in category_dict.values():
row_heights.append(len(samples))
fig = make_subplots(rows=len(set(metadata_list)), cols=1, shared_xaxes=True, vertical_spacing=0.05, row_heights=row_heights)

row = 1
for metadata, samples in category_dict.items():
if type(samples) == "str":
samples = [samples]
z_values = []
for sample in samples:
reads = TaXon_table_df[sample].values.tolist()
z_values = z_values + [reads]
y_values = samples
fig.add_trace(go.Heatmap(z=z_values, x=x_values, y=y_values, showscale=False, xgap=1, ygap=1, hoverongaps = False, zmin=0, zmax=global_max, colorscale=colorscales[row-1]), row=row, col=1)
row += 1

fig.write_html("/Users/tillmacher/Desktop/Paper/eRNA_paper/TEST.html")

if add_categories_sum == True:
z_values, y_values = [], []
for metadata, samples in category_dict.items():
reads = [sum(reads) for reads in TaXon_table_df_copy[samples].values.tolist()]
z_values = z_values + [[np.log(x) if x != 0 else 0 for x in reads]]
y_values.append(metadata)
fig.add_trace(go.Heatmap(z=z_values[::-1], x=x_values, y=y_values[::-1], showscale=False, xgap=1, ygap=1, hoverongaps = False, colorscale='gray_r'), row=row, col=1)
row += 1

fig.update_layout(width=int(width), height=int(height), template="seaborn", font_size=font_size, yaxis_nticks=5, title_font_size=font_size)
fig.update_xaxes(tickmode='linear')
fig.update_yaxes(tickmode='linear')
fig.update_xaxes(tickangle=-90)

occupancy_plot_directory = Path(str(path_to_outdirs) + "/" + "Site_occupancy_plots" + "/" + TaXon_table_xlsx.stem)
if not os.path.exists(occupancy_plot_directory):
os.mkdir(occupancy_plot_directory)

## define output files
output_pdf = Path(str(occupancy_plot_directory) + "/" + taxonomic_level + "_" + meta_data_to_test + "_heatmap_reads.pdf")
output_html = Path(str(occupancy_plot_directory) + "/" + taxonomic_level + "_" + meta_data_to_test + "_heatmap_reads.html")

## write output files
fig.write_image(str(output_pdf))
fig.write_html(str(output_html))

## ask to show file
answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
if answer == "Yes":
webbrowser.open('file://' + str(output_html))

## print closing text
closing_text = "Site occupancy heatmaps are found under:\n" + '/'.join(str(output_pdf).split("/")[-4:])
sg.Popup(closing_text, title="Finished", keep_on_top=True)

## write to log
from taxontabletools.create_log import ttt_log
placeholder = TaXon_table_xlsx.name + " (multiple site occupancy plots)"
ttt_log("site occupancy", "analysis", TaXon_table_xlsx.name, "", meta_data_to_test, path_to_outdirs)


else:
sg.Popup("The metadata table and taXon table are not matching!")
7 changes: 4 additions & 3 deletions taxontabletools/table_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def tc_alpha_diversity(TaXon_table_xlsx_1, TaXon_table_xlsx_2, path_to_outdirs,
fig.add_trace(go.Box(y=y2, name=name_2, text=text_values2, marker_color=color_discrete_sequence[1]))
fig.update_yaxes(title=taxon_title)
fig.update_traces(boxpoints='all', jitter=0.5)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(width=int(width_value), height=int(height_value), template=template, showlegend=False, font_size=font_size)

## create a folder if neccessary
Expand Down Expand Up @@ -411,20 +412,20 @@ def tc_pairwise_sample_comparison(TaXon_table_xlsx_1, TaXon_table_xlsx_2, path_t
df_out["Only " + name_2] = y_table_2_list

## Shared / Only barchart
fig.add_trace(go.Bar(name=name_1, orientation='h', y=x_samples, x=y_table_1_list, marker_color="rgb(141,160,203)"), row=1, col=1)
fig.add_trace(go.Bar(name="Shared", orientation='h', y=x_samples, x=y_shared_list, marker_color="rgb(102,194,164)"), row=1, col=1)
fig.add_trace(go.Bar(name=name_1, orientation='h', y=x_samples, x=y_table_1_list, marker_color="rgb(141,160,203)"), row=1, col=1)
fig.add_trace(go.Bar(name=name_2, orientation='h', y=x_samples, x=y_table_2_list, marker_color="rgb(252,141,98)"), row=1, col=1)
fig.update_layout(barmode='stack', showlegend=False, width=int(width_value), height=int(height_value), template=template, title="", font_size=font_size)
fig.update_yaxes(tickmode = 'linear', showgrid=False, row=1, col=1)
fig.update_xaxes(title=taxon_title + " (%)", showgrid=True, row=1, col=1)

## Jaccard plot
y = list(jaccard_dict.keys())
x = list(jaccard_dict.values())
x = [float(i) for i in list(jaccard_dict.values())]
df_out["Jaccard dissimilarity"] = x
fig.add_trace(go.Bar(y=y, x=x, name="Jaccard", orientation='h', marker_color="lightgrey"), row=1, col=2)
fig.update_yaxes(tickmode = 'linear', showticklabels=False, showgrid=False, row=1, col=2)
fig.update_xaxes(title="jaccard diss.", dtick = 0.5, showgrid=True, range=[0,1], row=1, col=2)
fig.update_xaxes(title="jaccard diss.", showgrid=True, range=[0, 1], autorange=False, tick0=0, dtick=0.5, row=1, col=2)

y = x_samples
x = y_n_taxa
Expand Down
Loading

0 comments on commit c3295b5

Please sign in to comment.