-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
72 lines (48 loc) · 2.31 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from sklearn.metrics import f1_score, accuracy_score
from datasets import Dataset
import os
import pandas as pd
from PIL import Image
TEST_FILE = os.path.join("data", "test", "Test.csv")
def compute_metrics(y_true, y_pred):
return {
"acc": accuracy_score(y_true, y_pred),
"f1_macro": f1_score(y_true, y_pred, average="macro"),
"f1_weighted": f1_score(y_true, y_pred, average="weighted"),
}
def compute_metrics_multi_task(predictions, targets, names):
d = dict()
for i, n in enumerate(names):
d[n] = compute_metrics(targets[:, i], predictions[:, i])
return d
def read_test_dataset():
df = pd.read_csv(TEST_FILE, sep="\t")
return Dataset.from_pandas(df)
NAMES = ["file_name", "misogynous", "shaming", "stereotype", "objectification", "violence"]
class VizHelper:
def __init__(self):
data_dir = "./data"
self.data_dir = data_dir
self.train_df = pd.read_csv(f"{data_dir}/training/training.tsv", sep="\t")
self.test_df = pd.read_csv(f"{data_dir}/test_labels.txt", sep="\t", header=None, names=NAMES)
original_test = pd.read_csv(f"{data_dir}/test/Test.csv", sep="\t")
self.test_df["Text Transcription"] = original_test["Text Transcription"]
cat_df = pd.concat([self.train_df, self.test_df])
self.web_df = pd.read_csv(f"{data_dir}/web_entities.tsv", sep="\t")
self.nsfw = pd.read_csv(f"{data_dir}/nsfw.tsv", sep="\t")
self.captions = pd.read_csv(f"{data_dir}/image_captions.tsv", sep="\t").rename(columns={"image": "file_name"})
self.ff = pd.read_csv(f"{data_dir}/fairface.tsv", sep="\t")
cat_df = cat_df.merge(self.web_df, on="file_name").merge(self.captions, on="file_name")
cat_df["nsfw"] = self.nsfw["is_safe_bool"]
self.data = cat_df.set_index("file_name")
def show_sample(self, file_name=None):
if not file_name:
sample = self.data.sample(1)
else:
sample = self.data.loc[file_name]
img = Image.open(f"{self.data_dir}/images/{file_name}").convert("RGB")
display(img)
faces = self.ff.loc[self.ff.file_name == file_name]
faces = faces[["race", "age", "gender"]]
display(faces)
return sample