-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathelliptic_Bickley_sweep.py
79 lines (61 loc) · 2.77 KB
/
elliptic_Bickley_sweep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""
An example demonstrating the computation of elliptic LCSs in a 2D system,
the Bickley jet. Here, we evaluate the number of particles in each of the
num_tracked_clusters largest groups identified by the algorithm as a function
of the parameter eps, for a given value of minPts. This provides a basis for
selecting the right values of minPts, eps, as well as the number of
physically-meaningful groups (set by the parameter num_meaningful_clusters in
elliptic_Bickley.py).
"""
import numpy as np
import pandas as pd
from scipy.spatial.distance import squareform
import matplotlib.pyplot as plt
import scipy.io as sio
import os
from tqdm import tqdm
from functions.elliptic import PairwiseDist_2D, EllipticLCS, Colors, RemoveSpuriousClusters
if __name__ == "__main__":
SavePath = './data/'
SaveName = 'elliptic_Bickley'
# Parameters
minPts = 10
eps_values = np.arange(0.1,5,0.1) # values of eps to sweep through
num_tracked_clusters = 10 # number of clusters to track
# Load data
data = sio.loadmat(SavePath+SaveName)
xP, yP, tv = data['xP'], data['yP'], data['tv'].flatten()
# Check if pairwise distances have already been computed
if os.path.isfile(SavePath+SaveName+'_Dij.mat'):
# Load precomputed pairwise distances
data = sio.loadmat(SavePath+SaveName+'_Dij')
Dij = squareform(data['Dij'].flatten())
else:
# Compute pairwise distances
Dij = PairwiseDist_2D(xP,yP,xPeriodicBC=(0.0,20.0))
# Save the pairwise distances for future use
sio.savemat(SavePath+SaveName+'_Dij.mat',{'Dij':squareform(Dij)})
print('Pairwise distances saved in %s' % SavePath+SaveName+'_Dij.mat')
cluster_sizes = np.zeros((len(eps_values),num_tracked_clusters))
for i,eps in enumerate(tqdm(eps_values)):
# Compute cluster labels
labels = EllipticLCS(Dij,minPts,eps)
# Compute and rank the size (number of particles) of each cluster
freqs = pd.Series(labels).value_counts()
# Retrieve the number of clusters (exclusing noise)
num_clusters = len(freqs[freqs.index!=-1].values)
# Store the size of the num_tracked_clusters largest clusters
num = np.min([num_tracked_clusters,num_clusters])
cluster_sizes[i,:num] = freqs[freqs.index!=-1].values[:num]
# Plot results
fig = plt.figure(figsize=[5,4])
ax = plt.gca()
ax.plot(eps_values,cluster_sizes)
ax.set_yscale('log')
ax.set_xlabel('$eps$')
ax.set_ylabel('$N_i$')
leg = ax.legend(['%g'%(i+1) for i in range(len(eps_values))],labelspacing=.2,loc='lower right')
ax.set_title('$\mathtt{minPts} = %g$'%minPts)
ax.set_title('DBSCAN clustering from t0 = %g to tf = %g \nwith minPts = %g'
% (tv[0],tv[-1],minPts))
plt.show()