diff --git a/docs/_includes/plotly/rq6_tropes.html b/docs/_includes/plotly/rq6_tropes.html index b6f3bef..f383962 100644 --- a/docs/_includes/plotly/rq6_tropes.html +++ b/docs/_includes/plotly/rq6_tropes.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/_includes/plotly/rq7_movie_clusters.html b/docs/_includes/plotly/rq7_movie_clusters.html new file mode 100644 index 0000000..7ce2fc7 --- /dev/null +++ b/docs/_includes/plotly/rq7_movie_clusters.html @@ -0,0 +1,2 @@ +
+
\ No newline at end of file diff --git a/docs/_includes/plotly/rq7_trope_combinations.html b/docs/_includes/plotly/rq7_trope_combinations.html new file mode 100644 index 0000000..2e4787a --- /dev/null +++ b/docs/_includes/plotly/rq7_trope_combinations.html @@ -0,0 +1,2 @@ +
+
\ No newline at end of file diff --git a/docs/_includes/plotly/rq7_trope_network.html b/docs/_includes/plotly/rq7_trope_network.html new file mode 100644 index 0000000..49a190c --- /dev/null +++ b/docs/_includes/plotly/rq7_trope_network.html @@ -0,0 +1,2 @@ +
+
\ No newline at end of file diff --git a/docs/_includes/plotly/rq7_tropes_boxplot.html b/docs/_includes/plotly/rq7_tropes_boxplot.html index 52ba760..37c7171 100644 --- a/docs/_includes/plotly/rq7_tropes_boxplot.html +++ b/docs/_includes/plotly/rq7_tropes_boxplot.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/_includes/plotly/rq7_worst_clusters.html b/docs/_includes/plotly/rq7_worst_clusters.html new file mode 100644 index 0000000..08e8d7a --- /dev/null +++ b/docs/_includes/plotly/rq7_worst_clusters.html @@ -0,0 +1,2 @@ +
+
\ No newline at end of file diff --git a/docs/_includes/plotlyrq7_movie_clusters.html b/docs/_includes/plotlyrq7_movie_clusters.html deleted file mode 100644 index e494e61..0000000 --- a/docs/_includes/plotlyrq7_movie_clusters.html +++ /dev/null @@ -1,2 +0,0 @@ -
-
\ No newline at end of file diff --git a/docs/_includes/plotlyrq7_trope_combinations.html b/docs/_includes/plotlyrq7_trope_combinations.html deleted file mode 100644 index cd2a0d7..0000000 --- a/docs/_includes/plotlyrq7_trope_combinations.html +++ /dev/null @@ -1,2 +0,0 @@ -
-
\ No newline at end of file diff --git a/docs/_includes/plotlyrq7_trope_network.html b/docs/_includes/plotlyrq7_trope_network.html deleted file mode 100644 index d100e93..0000000 --- a/docs/_includes/plotlyrq7_trope_network.html +++ /dev/null @@ -1,2 +0,0 @@ -
-
\ No newline at end of file diff --git a/docs/_includes/plotlyrq7_worst_clusters.html b/docs/_includes/plotlyrq7_worst_clusters.html deleted file mode 100644 index d5bf3b0..0000000 --- a/docs/_includes/plotlyrq7_worst_clusters.html +++ /dev/null @@ -1,2 +0,0 @@ -
-
\ No newline at end of file diff --git a/src/utils/plot_settings.py b/src/utils/plot_settings.py index c6b995f..72ca3d9 100644 --- a/src/utils/plot_settings.py +++ b/src/utils/plot_settings.py @@ -97,4 +97,10 @@ def apply_bar_style(fig): font_family='Arial, sans-serif' ) ) + fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray') + fig.update_yaxes( + showgrid=False, + automargin=True, + tickfont={'size': 12} + ) return fig \ No newline at end of file diff --git a/src/utils/trope_analysis.py b/src/utils/trope_analysis.py index 8a20d09..77e8fb8 100644 --- a/src/utils/trope_analysis.py +++ b/src/utils/trope_analysis.py @@ -1,5 +1,6 @@ from collections import defaultdict from pathlib import Path +import math import numpy as np import pandas as pd @@ -28,7 +29,7 @@ get_subplot_settings ) -OUTPUT_PATH = "data/preprocessed/" +OUTPUT_PATH = "docs/_includes/plotly/" def get_unique_genres(df_tropes_filtered): @@ -201,10 +202,15 @@ def rq6(df_cmu_tropes, threshold=6.0, k=10, min_votes=100): title=default_title, yaxis=dict( automargin=True, - showline=True, + showline=False, autorange="reversed", **AXIS_STYLE ), + xaxis=dict( + showgrid=True, + gridwidth=1, + gridcolor='LightGray', + ), xaxis_title='Ratio of low-rated movies to high-rated movies', yaxis_title='Tropes', annotations=[dict( @@ -266,6 +272,7 @@ def rq7(df_cmu_tropes, show_plotly_charts=True): showline=True, **AXIS_STYLE ), + height=500, ) fig.show() @@ -290,6 +297,7 @@ def rq7(df_cmu_tropes, show_plotly_charts=True): plt.title("Top 10 tropes with lowest average rating") plt.show() + def cluster_movies(df_cmu_tropes, df_tropes, n_clusters, n_tropes): trope_to_idx = {trope_id: idx for idx, trope_id in enumerate(df_tropes["TropeID"])} idx_to_trope = {idx: trope_id for idx, trope_id in enumerate(df_tropes["TropeID"])} @@ -315,6 +323,7 @@ def cluster_movies(df_cmu_tropes, df_tropes, n_clusters, n_tropes): return X_normalized, kmeans, movie_embeddings + def plot_movie_clusters(X_normalized, kmeans): tsne = TSNE(n_components=2, random_state=42, perplexity=30, n_iter=1000) X_2d = tsne.fit_transform(X_normalized) @@ -345,11 +354,13 @@ def plot_movie_clusters(X_normalized, kmeans): fig.update_layout( title='Movie clusters by tropes', + title_x=0.5, xaxis_title='First t-SNE Component', yaxis_title='Second t-SNE Component', showlegend=True, - width=1000, - height=800 + **COMMON_LAYOUT, + width=800, + height=600 ) fig.show() @@ -398,11 +409,13 @@ def plot_worst_clusters(df_cmu_tmdb_filtered, X_normalized, kmeans, top_k): fig.update_layout( title='Top 10 Worst-Rated Clusters Visualized with t-SNE', + title_x=0.5, xaxis_title='First t-SNE Component', yaxis_title='Second t-SNE Component', showlegend=True, - width=1000, - height=800 + **COMMON_LAYOUT, + width=800, + height=600 ) fig.show() @@ -412,6 +425,7 @@ def plot_worst_clusters(df_cmu_tmdb_filtered, X_normalized, kmeans, top_k): full_html=False, ) + def compute_worst_clusters_tropes(df_cmu_tmdb_filtered, df_cmu_tropes): cluster_avg_vote_average = df_cmu_tmdb_filtered.groupby("cluster")["vote_average"].mean() worst_clusters_by_rating = cluster_avg_vote_average.sort_values(ascending=True)[:10] @@ -424,6 +438,7 @@ def compute_worst_clusters_tropes(df_cmu_tmdb_filtered, df_cmu_tropes): return worst_clusters_tropes + def plot_trope_combinations(worst_clusters_tropes): tab10_colors = cm.tab10.colors color = f'rgb({int(tab10_colors[0][0]*255)},{int(tab10_colors[0][1]*255)},{int(tab10_colors[0][2]*255)})' @@ -448,15 +463,13 @@ def get_top_tropes(tropes_dict, n=3): fig = go.Figure() - colors = [f'rgb({int(tab10_colors[i][0]*255)},{int(tab10_colors[i][1]*255)},{int(tab10_colors[i][2]*255)})' for i in range(3)] - fig.add_trace(go.Bar( x=weights, y=combinations, orientation='h', marker=dict( - color=weights, - colorscale=colors, + color=COLORS[0], + line=dict(color=COLORS[0], width=1), ), hovertemplate='Weight: %{x}
%{y}' )) @@ -472,17 +485,15 @@ def get_top_tropes(tropes_dict, n=3): }, xaxis_title='Total Weight', yaxis_title=None, - height=500, - width=1000, - margin=dict(l=20, r=20, t=60, b=40), - yaxis={'categoryorder': 'total ascending'} + yaxis={'categoryorder': 'total ascending'}, + **COMMON_LAYOUT ) fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray') fig.update_yaxes( showgrid=False, automargin=True, - tickfont={'size': 12} + tickfont={'size': 14} ) fig.show() @@ -579,10 +590,10 @@ def plot_trope_network(worst_clusters_tropes): fig = go.Figure(data=edge_trace + [node_trace], layout=go.Layout( title='Trope Co-occurrence Network in Movies', + title_x=0.5, titlefont=dict(size=16), showlegend=False, hovermode='closest', - margin=dict(b=20,l=5,r=5,t=40), annotations=[ dict( text="Node size represents trope frequency
Edge thickness represents co-occurrence strength", @@ -593,8 +604,7 @@ def plot_trope_network(worst_clusters_tropes): ], xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - width=1200, - height=1000 + **COMMON_LAYOUT )) fig.show()