-
Notifications
You must be signed in to change notification settings - Fork 0
/
facetprediction_experiment.py
229 lines (161 loc) · 8.05 KB
/
facetprediction_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# -*- coding: utf-8 -*-
"""FacetPrediction_Experiment.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1actGK3Q0j58Gg7iwpmGSAoY-RCbB2h8M
"""
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
from scipy.interpolate import interp1d
from scipy.signal import find_peaks
from sklearn.metrics.pairwise import cosine_similarity
# Read the CSV file and preprocess the data
# model = tf.keras.models.load_model('Model1212_CNN.h5')
model = tf.keras.models.load_model('/content/drive/MyDrive/Research/Spectroscopy/Demo8/Model1212_CNN.h5')
num_specific_frequencies = 6
# Function to generate a spectrum as a sum of Gaussians
def generate_spectru_gaussians(amplitudes, centers):
widthG = 18
x_range = np.arange(2000, 2299.8, 0.2)
return np.sum([amplitudes[i] * np.exp(-(x_range - centers[i])**2 / widthG) for i in range(len(centers))], axis=0)
# Function to generate multiple spectra as a sum of Gaussians
def generate_multiple_spectra(amplitudes, centers):
widthG = 18
x_range = np.arange(2300, 2000, -0.2) # Generating inverse values
# Initialize empty lists for frequencies and intensity ratios
all_frequencies = []
all_intensity_ratios = []
all_spectra = []
for i in range(len(centers)):
if centers[i] == 2176:
intensity_ratios = [5, 2, 1]
frequencies = [2176, 2168, 2147]
elif centers[i] == 2168:
intensity_ratios = [3, 1]
frequencies = [2168, 2147]
else:
intensity_ratios = [1]
frequencies = [list(specificFrequencies.keys())[i]]
all_frequencies.append(frequencies)
all_intensity_ratios.append(intensity_ratios)
spectrum = np.sum([amplitudes[i] * np.exp(-(x_range - frequencies[j])**2 / widthG) * intensity_ratios[j] for j in range(len(frequencies))], axis=0)
all_spectra.append(spectrum)
final_spectrum = np.sum(all_spectra, axis=0)
return final_spectrum, all_frequencies, all_intensity_ratios
# Define specific frequencies and corresponding classes/labels
specificFrequencies = {
2175: 'CeO2(110)red',
2170: 'CeO2(110)ox',
2176: 'CeO2(100)ox',
2168: 'CeO2(100)red',
2162: 'CeO2(111)red',
2154: 'CeO2(111)ox'
}
# Read the CSV file and preprocess the data
file_path = '/content/drive/MyDrive/Research/Spectroscopy/Demo8/111ox.csv'
data = pd.read_csv(file_path, header=None, names=["Wavenumber", "Absorbance"])
file_name = os.path.splitext(os.path.basename(file_path))[0] # Extracts the file name without extension
# Assuming spectrum_min and spectrum_max are defined earlier
spectrum_min, spectrum_max = 2000.0, 2300.0
data['Wavenumber'] = pd.to_numeric(data['Wavenumber'], errors='coerce')
data['Absorbance'] = pd.to_numeric(data['Absorbance'], errors='coerce')
# Filter data within the specified wavenumber range
filtered_data = data[(data['Wavenumber'] >= spectrum_min) & (data['Wavenumber'] <= spectrum_max)].copy()
filtered_data = filtered_data.dropna(subset=['Absorbance'])
# Preprocess the filtered data
filtered_data['Absorbance'] *= -1
min_value = np.min(filtered_data['Absorbance'] )
max_value = np.max(filtered_data['Absorbance'] )
spectraData_normalized = (filtered_data['Absorbance'] - min_value) / (max_value - min_value)
filtered_data['Absorbance'] =spectraData_normalized
# Calculating background value and subtracting it using spectrum_min and spectrum_max
background_data1 = filtered_data[(filtered_data['Wavenumber'] >= spectrum_min) & (filtered_data['Wavenumber'] <= 2140)]
background_data2 = filtered_data[(filtered_data['Wavenumber'] >= 2200) & (filtered_data['Wavenumber'] <= spectrum_max)]
background_value1 = background_data1['Absorbance'].mean()
background_value2 = background_data2['Absorbance'].mean()
# Using the average of these two values as the overall background value
background_value = np.mean([background_value1, background_value2])
filtered_data['Absorbance'] -= background_value
# Prepare data for interpolation
x = filtered_data['Wavenumber']
y = filtered_data['Absorbance']
# Normalize x-values for interpolation between 0 and 1
x_normalized = (x - spectrum_min) / (spectrum_max - spectrum_min)
# Initialize the array with zeros
interpolated_absorbance = np.zeros((num_specific_frequencies, 250)) # Initialize interpolated array
# Create an interpolation function
f = interp1d(x_normalized, y, kind='linear', fill_value="extrapolate")
# Define the new x-range for interpolation
new_x = np.linspace(0, 1, 1500) # Adjust the number of points as needed
# Compute the interpolated y-values
interpolated_absorbance = f(new_x)
yy=interpolated_absorbance
interpolated_absorbance = interpolated_absorbance.reshape(1, -1, num_specific_frequencies)
predicted_amplitudes = model.predict(interpolated_absorbance )
# Flatten the predicted_amplitudes array for plotting
predicted_amplitudes_flat = predicted_amplitudes.flatten()
print (predicted_amplitudes)
# Use the previously defined function to generate spectrum
predicted_spectrum_1, _, _ = generate_multiple_spectra(predicted_amplitudes_flat, list(specificFrequencies.keys()))
# Use the second method to generate spectrum
predicted_spectrum_2 = generate_spectru_gaussians(predicted_amplitudes_flat, list(specificFrequencies.keys()))
# Normalize both spectra if needed
predicted_spectrum_1_shifted = predicted_spectrum_1 - predicted_spectrum_1.min()
normalized_predicted_spectrum_1 = predicted_spectrum_1_shifted / predicted_spectrum_1_shifted.max()
predicted_spectrum_2_shifted = predicted_spectrum_2 - predicted_spectrum_2.min()
normalized_predicted_spectrum_2 = predicted_spectrum_2_shifted / predicted_spectrum_2_shifted.max()
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import find_peaks
wavenumbers_predicted = np.arange(2000, 2299.8, 0.2)
# Assuming normalized_predicted_spectrum_2 contains the predicted spectrum data
peaks_predicted, _ = find_peaks(normalized_predicted_spectrum_2, height=0.02, distance=10) # Adjust parameters accordingly
plt.figure(figsize=(10, 6))
plt.plot(np.arange(2000, 2299.8, 0.2), yy, label='Actual Absorbance')
plt.plot(np.arange(2000, 2299.8, 0.2), normalized_predicted_spectrum_2, 'g--', label='Predicted Spectrum')
for position in peaks_predicted:
x_peak = np.arange(2000, 2299.8, 0.2)[position]
y_peak = normalized_predicted_spectrum_2[position]
# Generate Gaussian curve
widthG = 18
x_range = np.arange(2000, 2299.8, 0.2)
gaussian_curve = np.exp(-(x_range - x_peak) ** 2 / widthG)
# Normalize the Gaussian curve to the peak height
normalized_gaussian_curve = gaussian_curve * y_peak / max(gaussian_curve)
# Plot Gaussian normalization curve with filled area
plt.fill_between(x_range, normalized_gaussian_curve, alpha=0.3)
# Find wavenumbers of peaks using their indices
wavenumbers_of_peaks = wavenumbers_predicted[peaks_predicted]
# Plotting vertical lines at the wavenumbers of peaks
for wavenumber in wavenumbers_of_peaks:
plt.vlines(wavenumber, ymin=0, ymax=1, colors='orange', linestyles='dashed', label=f'Peak {wavenumber:.1f}')
plt.title(f'Actual Absorbance vs Predicted Absorbance ({file_name})')
plt.xlabel('Wavenumber')
plt.ylabel('Absorbance')
plt.gca().invert_xaxis()
plt.legend()
plt.grid(True)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Calculate the distribution of each predicted facet
facet_amplitudes = predicted_amplitudes_flat[:num_specific_frequencies]
# Create a bar plot for predicted amplitudes
plt.figure(figsize=(8, 6))
facets = list(specificFrequencies.values())
bars = plt.bar(facets, facet_amplitudes, color='skyblue')
plt.xlabel('Facets')
plt.ylabel('Distribution')
plt.title('Predicted Facets')
plt.xticks(rotation=45, ha='right') # Rotate x-labels for better visibility
# Annotate each bar with its amplitude value
for bar, amplitude in zip(bars, facet_amplitudes):
plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, f'{amplitude:.2f}', ha='center', va='bottom')
plt.tight_layout()
# Show the bar plot
plt.show()