-
Notifications
You must be signed in to change notification settings - Fork 0
/
graph_generation.py
executable file
·34 lines (25 loc) · 1.3 KB
/
graph_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
import networkx as nx
import numpy as np
from scipy.spatial.distance import cosine
from copy import deepcopy
from ephen_utils import difference
from ephen_utils import make_hin
from ephen_utils import inner_connections
df = pd.concat([pd.read_parquet('/media/pauloricardo/basement/projeto/df01-10_1.parquet'), pd.read_parquet('/media/pauloricardo/basement/projeto/df01-10_2.parquet')])
stats = pd.read_csv('/media/pauloricardo/basement/projeto/stats_filtered01-10.csv').drop(columns='Unnamed: 0').reset_index(drop=True)
df['date_str'] = df['DATE']
df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%d')
for index, row in stats.iterrows():
y = df['DATE'].apply(difference, end=df['DATE'].iloc[row['target']], interval='week')
X = df[y > 0].embedding
filtered_df = df[y > 0]
y = y[y > 0]
filtered_df['dis_cos'] = df.embedding.apply(cosine, v=df.embedding.iloc[row['target']])
X = X[filtered_df['dis_cos'] <= 0.5]
y = y[filtered_df['dis_cos'] <= 0.5]
filtered_df = filtered_df[filtered_df['dis_cos'] <= 0.5]
filtered_df = filtered_df.reset_index().drop(columns='index')
G = make_hin(X.to_numpy(), filtered_df)
G = inner_connections(G)
nx.write_gpickle(G, "/media/pauloricardo/basement/projeto/graphs/graph_" + str(row['target']) + ".gpickle")