-
Notifications
You must be signed in to change notification settings - Fork 0
/
descriptives.py
80 lines (58 loc) · 2.34 KB
/
descriptives.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
Author: Harrison Curtis
"""
# Import neccary libraries.
import pandas as pd
import ast
def main():
# Read in the analysis results for descritive n
tranformer_df = sentiment_split(pd.read_csv("data/tranformers_analysis_results.csv"))
vader_df = pd.read_csv("data/vader_analysis_results.csv")
# Get the avaege length of reviews
print(avg_review_length(tranformer_df, reviews_col="reviewBody"))
# Number of reviews
n_reviews = len(tranformer_df)
n_postive_reviews = sum(tranformer_df["label"] == "POSITIVE")
print(n_reviews)
print(n_postive_reviews)
# Extract the negative review
# Example of misclassficaiton from the model
# with high score.
negative_review = tranformer_df[tranformer_df["label"] == "NEGATIVE"]
negative_review_body = negative_review["reviewBody"]
negative_review_body.to_csv('data/negativereview.txt', sep=' ', index=False)
print(negative_review["sentiment_scores"])
# Functions to count base descritive stats.
def count_words(text):
# Split the string by whitespace and return the length of the resulting list
return len(text.split())
def avg_review_length(df, reviews_col):
"""
Function to clacute the average lenght of online review in pandas dataframe.
Args:
df (pandas.DataFrame): The DataFrame containing the text data.
column_name (str): The name of the column containing the text data.
returns:
int: of the avaerge review length in dataframe review_col.
"""
# Get the length of eahc review.
df['text_length'] = df[reviews_col].apply(count_words)
# Round the answer to whole number.
average_length = int(round(df['text_length'].mean(), 0))
return(average_length)
# Functions for transfromer model descriptives.
def convert_to_dict(string):
return ast.literal_eval(string)
def sentiment_split(df , col = "sentiment_scores"):
"""
"""
# Convert the 'sentiment_scores' column to dictionaries
df[col] = df[col].apply(convert_to_dict)
# Split the 'sentiment_scores' column into separate columns
sentiment_scores_df = df[col].apply(pd.Series)
# Concatenate the sentiment scores DataFrame with the original DataFrame
df = pd.concat([df, sentiment_scores_df], axis=1)
# Display the DataFrame
return(df)
if __name__ == "__main__":
main()