-
Notifications
You must be signed in to change notification settings - Fork 0
/
la_score.py
129 lines (99 loc) · 3.78 KB
/
la_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- coding: utf-8 -*-
import numpy
from scipy.sparse import *
from scipy.stats import pearsonr
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import kneighbors_graph as kng
from scipy.sparse.linalg import expm
from scipy.linalg import solve_banded
from scipy.spatial.distance import pdist
import scipy.spatial.distance
import math
def construct_W(X, **kwargs):
n_samples, n_features = numpy.shape(X)
k = kwargs['neighbour_size']
t = kwargs['t_param']
S = kng(X, k + 1, mode='distance',
metric='euclidean') # sqecludian distance works only with mode=connectivity results were absurd
S = (-1 * (S * S)) / (2 * t * t)
S = S.tocsc()
S = expm(S)
S = S.tocsr()
# [1] M. Belkin and P. Niyogi, “Laplacian Eigenmaps and Spectral Techniques for Embedding and Clustering,” Advances in Neural Information Processing Systems,
# Vol. 14, 2001. Following the paper to make the weights matrix symmetrix we use this method
bigger = numpy.transpose(S) > S
S = S - S.multiply(bigger) + numpy.transpose(S).multiply(bigger)
return S
def lap_score(X, **kwargs):
if 'W' not in kwargs.keys():
if 't_param' not in kwargs.keys():
t_param = 1
else:
t = kwargs['t_param']
if 'neighbour_size' not in kwargs.keys():
neighbour_size = 5
else:
n = kwargs['neighbour_size']
W = construct_W(X, t_param=t, neighbour_size=n)
else:
W = kwargs['W']
# build the diagonal D matrix from affinity matrix W
D = numpy.array(W.sum(axis=1))
L = W
tmp = numpy.dot(numpy.transpose(D), X)
D = diags(numpy.transpose(D), [0])
Xt = numpy.transpose(X)
t1 = numpy.transpose(numpy.dot(Xt, D.todense()))
t2 = numpy.transpose(numpy.dot(Xt, L.todense()))
# compute the numerator of Lr
tmp = numpy.multiply(tmp, tmp) / D.sum()
D_prime = numpy.sum(numpy.multiply(t1, X), 0) - tmp
# compute the denominator of Lr
L_prime = numpy.sum(numpy.multiply(t2, X), 0) - tmp
# avoid the denominator of Lr to be 0
D_prime[D_prime < 1e-12] = 10000
# compute laplacian score for all features
score = 1 - numpy.array(numpy.multiply(L_prime, 1 / D_prime))[0, :]
return numpy.transpose(score)
"""
Rank features in ascending order according to their laplacian scores, the smaller the laplacian score is, the more
important the feature is
"""
def feature_ranking(score):
idx = numpy.argsort(score, 0)
return idx + 1
def LaplacianScore(X, **kwargs):
if 'W' not in kwargs.keys():
if 't_param' not in kwargs.keys():
t_param = 1
else:
t = kwargs['t_param']
if 'neighbour_size' not in kwargs.keys():
neighbour_size = 5
else:
n = kwargs['neighbour_size']
W = construct_W(X, t_param=t, neighbour_size=n)
n_samples, n_features = numpy.shape(X)
# construct the affinity matrix W
else:
W = kwargs['W']
# construct the diagonal matrix
D = numpy.array(W.sum(axis=1))
D = diags(numpy.transpose(D), [0])
# construct graph Laplacian L
L = D - W.toarray()
# construct 1= [1,···,1]'
I = numpy.ones((n_samples, n_features))
# construct fr' => fr= [fr1,...,frn]'
Xt = numpy.transpose(X)
# construct fr^=fr-(frt D I/It D I)I
t = numpy.matmul(numpy.matmul(Xt, D.toarray()), I) / numpy.matmul(numpy.matmul(numpy.transpose(I), D.toarray()), I)
t = t[:, 0]
t = numpy.tile(t, (n_samples, 1))
fr = X - t
# Compute Laplacian Score
fr_t = numpy.transpose(fr)
Lr = numpy.matmul(numpy.matmul(fr_t, L), fr) / numpy.matmul(numpy.dot(fr_t, D.toarray()), fr)
return numpy.diag(Lr)