-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
68 lines (53 loc) · 2.08 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import numpy as np
import numpy as np
import pandas as pd
from colorama import Back, Fore, Style
import time
from sklearn.cluster import AgglomerativeClustering
import sklearn.metrics as sm
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from colorama import Back, Fore, Style
from sklearn import metrics
colors = np.array(['green', 'orange', 'blue', ' cyan', 'black','red'])
from sklearn.metrics import r2_score, mean_squared_error, confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
df = pd.read_csv("./buddymove_holidayiq.csv", sep='\s*,\s*',
header=0, encoding='ascii', engine='python')
def get_features():
features = df.columns.tolist()
del features[0]
return features
colors = np.array(['green', 'orange', 'blue', ' cyan', 'black'])
def save_kmeans(X):
start = time.time()
n_clusters = 6
kmean = KMeans(n_clusters=n_clusters, max_iter=500)
kmean.fit(X)
labels = kmean.labels_
silhouette_score= metrics.silhouette_score(X, labels, metric='euclidean')
end = time.time()
print(silhouette_score)
f = open("./results/kmeans.txt", "w")
f.write(f"n_clusters : {n_clusters}\nsilhouette_score : {silhouette_score}\n")
def save_linkage(X):
result =""
linkage_array=["ward", "average", "single", "complete"]
for linkage in linkage_array :
result += f"linkage : {linkage}\n\n"
for n_clusters in range(2,10):
model = AgglomerativeClustering(linkage=linkage, n_clusters=n_clusters)
model.fit(X)
labels = model.labels_
silhouette_score= metrics.silhouette_score(X, labels, metric='euclidean')
result += f"n_clusters : {n_clusters}\nsilhouette_score : {silhouette_score}\n\n"
result += "\n****************\n"
f = open("./results/linkage.txt", "w")
f.write(result)
X = df[get_features()]
save_linkage(X)
save_kmeans(X)