-
Notifications
You must be signed in to change notification settings - Fork 0
/
physionet_challenge_utility_script.py
78 lines (69 loc) · 2.6 KB
/
physionet_challenge_utility_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import numpy as np, sys,os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import loadmat
import wfdb
import tarfile
from sklearn.preprocessing import MultiLabelBinarizer
import math
import matplotlib.pyplot as plt
import seaborn as sns
def load_challenge_data(filename):
x = loadmat(filename)
data = np.asarray(x['val'], dtype=np.float64)
new_file = filename.replace('.mat','.hea')
input_header_file = os.path.join(new_file)
with open(input_header_file,'r') as f:
header_data=f.readlines()
return data, header_data
def import_key_data_Georgia(path):
gender=[]
age=[]
labels=[]
ecg_filenames=[]
for subdir, dirs, files in sorted(os.walk(path)):
for filename in files:
filepath = subdir + os.sep + filename
if filepath.endswith(".mat"):
data, header_data = load_challenge_data(filepath)
if data.shape[1] == 5000:
labels.append(header_data[15][5:-1])
ecg_filenames.append(filepath)
gender.append(header_data[14][6:-1])
age.append(header_data[13][6:-1])
return gender, age, labels, ecg_filenames
def import_key_data_China(path):
gender=[]
age=[]
labels=[]
ecg_filenames=[]
for subdir, dirs, files in sorted(os.walk(path)):
for filename in files:
filepath = subdir + os.sep + filename
if filepath.endswith(".mat"):
data, header_data = load_challenge_data(filepath)
if data.shape[1] > 5000:
labels.append(header_data[15][5:-1])
ecg_filenames.append(filepath)
gender.append(header_data[14][6:-1])
age.append(header_data[13][6:-1])
return gender, age, labels, ecg_filenames
def make_undefined_class(labels, df_unscored):
df_labels = pd.DataFrame(labels)
for i in range(len(df_unscored.iloc[0:,1])):
df_labels.replace(to_replace=str(df_unscored.iloc[i,1]), inplace=True ,value="undefined class", regex=True)
'''
#equivalent classes
codes_to_replace=['713427006','284470004','427172004']
replace_with = ['59118001','63593006','17338001']
for i in range(len(codes_to_replace)):
df_labels.replace(to_replace=codes_to_replace[i], inplace=True ,value=replace_with[i], regex=True)
'''
return df_labels
def onehot_encode(df_labels):
one_hot = MultiLabelBinarizer()
y=one_hot.fit_transform(df_labels[0].str.split(pat=','))
y = np.delete(y, -1, axis=1)
return y, one_hot.classes_[0:-1]