-
Notifications
You must be signed in to change notification settings - Fork 179
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Example] Add battery-electrochemical-performance prediction model #967
base: develop
Are you sure you want to change the base?
Changes from 250 commits
3c4cf8d
2a76d98
eda6b78
8063a77
4c8a961
21d7ffc
6e742a7
61ba5c2
f76b48a
19d6858
8a1a349
2762835
0fa9a14
6cf2951
6d7c2ef
b45fe5f
152956f
eabdc43
b5d7636
473b373
5336d15
d3fa030
ba9fcaf
b3ae407
77a6f6b
627e4b6
f5fa76c
8efb7ca
d3aba2c
f106ca8
98d3e72
5e56111
04d2d89
183c888
7cabf19
8d756ce
583f2fb
6823a3e
7fe6d3c
08ec87a
c713103
eed03c0
1ff40c2
73d725f
ce4b352
7c7e4fc
16c86c6
145445a
e13c8f1
3baffd2
45b1474
bb1d3d1
e46008a
c489261
10abaa6
7736f44
aa5eaa1
1b96d9b
ff7d715
4dd897a
03cab2f
77eeadb
effc838
a52ce7c
47356b3
88b63d3
8028ad6
6b2cc63
c752895
fc8f131
18ea97a
434dfc4
b280169
f64029d
cd5345b
6e82a63
2c2bc8d
10015f7
b4579ad
cda91fd
483a3fa
e372903
754cbaa
9ccf46c
54f8b8d
17eff79
9b62a10
f0eaa6a
d7e4991
7827f42
8d4369a
2af9ae8
050c30d
fff722e
89aeb4d
a81d083
5862588
9261c69
f048a4f
fcfc38e
6503adb
8ad8d0c
53d9a20
05bb35c
cb21ea4
0a1c4a0
77e738e
277e41e
95e979e
268aae9
344d60f
46e204a
fa00f6d
0b432a8
b94c9b1
30da823
c5118d9
8ddc2f4
0205128
1c93d88
eba289d
53a3a2a
b96fc7b
a331946
97ca920
5cd0162
7f66831
0cf728f
6d2e17c
f19a5c6
3f3d2f9
c61a5f6
fae0f16
23dd4e5
6d375c1
cfae240
47c433b
f0d3ae9
e0b7d15
fe175c4
8f1f27d
60a6369
7c04bf8
369b726
61be241
6a96beb
f1a0336
c29678e
98bfb30
6b81879
4d44ed5
25725c6
f874bf2
d6047cf
3d70cbe
a1830a1
c57a97a
fceb3f2
bfee4f5
f5dac19
62608a8
c46a933
33b6597
e5cdf29
452ba2f
ce050a3
99d5170
3fb6449
30a2736
1cd67d3
a880f4b
9b91ca7
085b458
ec28348
1e61422
0d4e2ea
9e9d147
3113eb5
4171173
ee53eeb
1162cc6
c8c83b4
fba7481
76e16ce
aea71b4
e96e93d
2edfaa5
41ee16e
25ff28a
6dd6a98
a2240ac
985ed10
c871670
1deb3bc
8997f72
7f8817d
c2134a6
d62cffd
02db125
0d61c06
9bb8dc0
6ba4bee
af9dab1
854dc9e
4735514
fc5bd69
62d0457
fbad74e
5f3a1a1
e3accd4
fcbfdfa
2809f99
5840712
8641faa
2834c8a
eaa3982
d67a5b9
957ac7e
93117ae
1cf8019
40dc557
4c8a473
4154f32
add2e32
dba9c3a
46cc652
95f0f1b
4d6a057
e98b3cf
a2f0da3
22a6bb7
7380a0f
170ab7e
9ae0d7e
016c31c
6aa419c
52eecac
7178d01
30c9aa4
e16c0ae
221f1c1
3cb2ae1
7dc8268
854707d
fb28d61
d091f24
c44cf29
83fc2c1
065b0f5
c8dc22c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 锂离子电池案例为什么需要改动这个文件呢? |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
from ppsci.arch import base | ||
import tensorflow as tf | ||
from tensorflow import keras | ||
from sklearn.decomposition import PCA | ||
from sklearn.preprocessing import MinMaxScaler | ||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import functools | ||
import csv | ||
import math | ||
import os | ||
|
||
class MLPModel(base.Arch): | ||
def __init__(self, input_shape, learning_rate, nodes1, nodes2, nodes3, dropout_rate1, dropout_rate2, dropout_rate3): | ||
super(MLPModel, self).__init__() | ||
self.model = keras.Sequential([ | ||
keras.layers.Input(shape=input_shape), | ||
keras.layers.Dense(nodes1, activation="relu"), | ||
keras.layers.Dropout(dropout_rate1), | ||
keras.layers.Dense(nodes2, activation="relu"), | ||
keras.layers.Dropout(dropout_rate2), | ||
keras.layers.Dense(nodes3, activation="relu"), | ||
keras.layers.Dropout(dropout_rate3), | ||
keras.layers.Dense(3, activation="sigmoid"), | ||
]) | ||
self.model.compile( | ||
optimizer=keras.optimizers.RMSprop( | ||
learning_rate=learning_rate, momentum=0.9, centered=True | ||
), | ||
loss="mse", | ||
) | ||
|
||
def forward(self, x): | ||
return self.model(x) | ||
|
||
def train(self, x_train, y_train, x_test, y_test, epochs=1000): | ||
history = self.model.fit( | ||
x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), verbose=0 | ||
) | ||
self.visualize_loss(history, "Training and Validation Loss") | ||
|
||
def visualize_loss(self, history, title): | ||
loss = history.history["loss"] | ||
val_loss = history.history["val_loss"] | ||
epochs = range(len(loss)) | ||
plt.figure(figsize=(6, 4)) | ||
plt.plot(epochs, loss, "b", label="Training loss") | ||
plt.plot(epochs, val_loss, "r", label="Validation loss") | ||
plt.title(title) | ||
plt.xlabel("Epochs") | ||
plt.ylabel("Loss") | ||
plt.legend() | ||
plt.show(block=False) # 不阻塞程序的执行 | ||
|
||
def evaluate(self, x_test, y_test): | ||
loss = self.model.evaluate(x_test, y_test) | ||
return loss | ||
|
||
# 数据预处理和模型训练示例代码 | ||
filePath = "C:/Users/ssm18/new0811/PaddleScience/ppsci/data/dataset/MP_data_down_loading(train+validate).csv" | ||
|
||
df = pd.read_csv(filePath, header=0) | ||
|
||
# 数据处理部分 | ||
df_charge_space_group_number = pd.get_dummies(df["charge_space_group_number"], prefix="charge_space_group_number") | ||
df = df.join(df_charge_space_group_number) | ||
df_discharge_space_group_number = pd.get_dummies(df["discharge_space_group_number"], prefix="discharge_space_group_number") | ||
df = df.join(df_discharge_space_group_number) | ||
|
||
df = df.drop( | ||
[ | ||
"battery_id", | ||
"battery_formula", | ||
"framework_formula", | ||
"adj_pairs", | ||
"capacity_vol", | ||
"energy_vol", | ||
"formula_charge", | ||
"formula_discharge", | ||
"id_charge", | ||
"id_discharge", | ||
"working_ion", | ||
"num_steps", | ||
"stability_charge", | ||
"stability_discharge", | ||
"charge_crystal_system", | ||
"charge_energy_per_atom", | ||
"charge_formation_energy_per_atom", | ||
"charge_band_gap", | ||
"charge_efermi", | ||
"discharge_crystal_system", | ||
"discharge_energy_per_atom", | ||
"discharge_formation_energy_per_atom", | ||
"discharge_band_gap", | ||
"discharge_efermi", | ||
], | ||
axis=1, | ||
) | ||
|
||
x_df = df.drop(["average_voltage", "capacity_grav", "energy_grav"], axis=1) | ||
y_df = df[["average_voltage", "capacity_grav", "energy_grav"]] | ||
|
||
pca = PCA(0.99) | ||
x_df = pca.fit_transform(x_df) | ||
x_df = pd.DataFrame(x_df) | ||
|
||
min_max_scaler = MinMaxScaler() | ||
x_df.columns = x_df.columns.astype(str) | ||
x_df = min_max_scaler.fit_transform(x_df) | ||
|
||
y_min = y_df.min() | ||
y_max = y_df.max() | ||
y_df = (y_df - y_min) / (y_max - y_min) | ||
|
||
len_train_test = int(tuple(x_df.shape)[0] * 0.9) | ||
x_train, x_test = x_df[:len_train_test], x_df[len_train_test:] | ||
y_train, y_test = y_df[:len_train_test], y_df[len_train_test:] | ||
|
||
# 初始化并训练模型 | ||
model = MLPModel( | ||
input_shape=(tuple(x_train.shape)[1],), | ||
learning_rate=0.0001, | ||
nodes1=40, | ||
nodes2=30, | ||
nodes3=15, | ||
dropout_rate1=0.2, | ||
dropout_rate2=0.2, | ||
dropout_rate3=0.2 | ||
) | ||
|
||
model.train(x_train, y_train, x_test, y_test, epochs=1000) | ||
model.evaluate(x_test, y_test) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,9 @@ | |
from ppsci.arch.vae import AutoEncoder # isort:skip | ||
from ppsci.utils import logger # isort:skip | ||
|
||
from ppsci.arch.xg_optuna import MyPaddleScienceXGBoostModel | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
from .xg_optuna import MyPaddleScienceXGBoostModel | ||
from .MLPModel import MLPModel | ||
|
||
__all__ = [ | ||
"AFNONet", | ||
|
@@ -93,6 +96,10 @@ | |
"UNetEx", | ||
"UNONet", | ||
"USCNN", | ||
|
||
"MyPaddleScienceXGBoostModel", | ||
"MLPModel", | ||
|
||
] | ||
|
||
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 问题同MLPModel.py,不要将整个案例文件放到arch/下 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
import paddle | ||
# 导入必要的模块 | ||
import optuna | ||
import pandas as pd | ||
import numpy as np | ||
from xgboost import XGBRegressor | ||
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score | ||
import warnings | ||
from pathlib import Path | ||
from sklearn.model_selection import train_test_split | ||
|
||
warnings.filterwarnings("ignore") | ||
OUTPUT_TEST = True | ||
|
||
# 获取当前脚本所在目录 | ||
current_dir = Path(__file__).resolve().parent | ||
|
||
# 构建数据文件的完整路径 | ||
X_train_path = "C:/Users/ssm18/new0811/PaddleScience/examples/ML_Pipeline/data/data/cleaned/training.csv" | ||
y_train_path = "C:/Users/ssm18/new0811/PaddleScience/examples/ML_Pipeline/data/data/cleaned/training_labels.csv" | ||
X_val_path = "C:/Users/ssm18/new0811/PaddleScience/examples/ML_Pipeline/data/data/cleaned/validation.csv" | ||
y_val_path = "C:/Users/ssm18/new0811/PaddleScience/examples/ML_Pipeline/data/data/cleaned/validation_labels.csv" | ||
X_test_path = "C:/Users/ssm18/new0811/PaddleScience/examples/ML_Pipeline/data/data/cleaned/test.csv" | ||
y_test_path = "C:/Users/ssm18/new0811/PaddleScience/examples/ML_Pipeline/data/data/cleaned/test_labels.csv" | ||
|
||
import os | ||
|
||
print("X_train_path exists:", os.path.exists(X_train_path)) | ||
|
||
# 读取数据并处理 | ||
X_train = pd.read_csv(X_train_path) | ||
y_train = pd.read_csv(y_train_path) | ||
X_val = pd.read_csv(X_val_path) | ||
y_val = pd.read_csv(y_val_path) | ||
|
||
columns = X_train.columns | ||
for col in columns: | ||
if "[" in col or "]" in col: | ||
old_name = col | ||
col = col.replace("[", "(") | ||
col = col.replace("]", ")") | ||
X_train = X_train.rename(columns={old_name: col}) | ||
X_val = X_val.rename(columns={old_name: col}) | ||
|
||
X_train, X_verif, y_train, y_verif = train_test_split( | ||
X_train, y_train, test_size=0.1, random_state=42 | ||
) | ||
X_train = X_train.reset_index(drop=True) | ||
y_train = y_train.reset_index(drop=True) | ||
X_verif = X_verif.reset_index(drop=True) | ||
y_verif = y_verif.reset_index(drop=True) | ||
X_val = X_val.reset_index(drop=True) | ||
y_val = y_val.reset_index(drop=True) | ||
|
||
# 使用 Optuna 进行超参数优化 | ||
def objective(trial): | ||
params = { | ||
"max_depth": trial.suggest_int("max_depth", 1, 15), | ||
"learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 1.0), | ||
"n_estimators": trial.suggest_int("n_estimators", 50, 1000), | ||
"min_child_weight": trial.suggest_int("min_child_weight", 1, 10), | ||
"gamma": trial.suggest_loguniform("gamma", 1e-08, 1.0), | ||
"subsample": trial.suggest_loguniform("subsample", 0.5, 0.9), | ||
"colsample_bytree": trial.suggest_loguniform("colsample_bytree", 0.5, 0.9), | ||
} | ||
params["tree_method"] = "hist" | ||
optuna_model = XGBRegressor(**params) | ||
optuna_model.fit(X_train, y_train) | ||
verif_pred = optuna_model.predict(X_verif) | ||
verif_loss = mean_absolute_percentage_error(y_verif, verif_pred) * 100 | ||
verif_error = mean_squared_error(y_verif, verif_pred, squared=False) | ||
error = verif_loss + verif_error | ||
return error | ||
|
||
# 开始超参数优化 | ||
sampler = optuna.samplers.CmaEsSampler() | ||
study = optuna.create_study(sampler=sampler) | ||
study.optimize(objective, n_trials=50) | ||
|
||
# 获取最佳参数 | ||
best_params = study.best_trial.params | ||
print("Best trial:") | ||
print(" Value: ", study.best_trial.value) | ||
print(" Params: ") | ||
for key, value in best_params.items(): | ||
print(f" {key}: {value}") | ||
|
||
# 使用最佳参数初始化 XGBoost 模型 | ||
model = XGBRegressor(**best_params) | ||
model.fit(X_train, y_train) | ||
|
||
# 预测验证集 | ||
val_preds = model.predict(X_val) | ||
val_loss = mean_squared_error(y_val, val_preds, squared=False) | ||
print(f"Validation RMSE: {val_loss}") | ||
|
||
# 加载测试数据 | ||
X_test = pd.read_csv(X_test_path) | ||
y_test = pd.read_csv(y_test_path) | ||
# 对测试数据进行相同的列名处理,确保特征名称一致 | ||
columns_test = X_test.columns | ||
for col in columns_test: | ||
if "[" in col or "]" in col: | ||
old_name = col | ||
col = col.replace("[", "(") | ||
col = col.replace("]", ")") | ||
X_test = X_test.rename(columns={old_name: col}) | ||
|
||
# 检查是否与训练集的列一致 | ||
X_test = X_test[X_train.columns] | ||
|
||
# 将测试数据转换为 Paddle Tensor | ||
test_inputs = {"x": paddle.to_tensor(X_test.values).astype("float32")} | ||
test_labels = {"y": paddle.to_tensor(y_test.values).astype("float32")} | ||
# 使用模型预测测试集 | ||
test_preds = model.predict(X_test) | ||
|
||
# 计算测试集上的评估指标 | ||
test_rmse = mean_squared_error(y_test, test_preds, squared=False) | ||
test_r2 = r2_score(y_test, test_preds) | ||
adjusted_percent_error = test_rmse / y_test.mean() * 100 | ||
|
||
# 打印测试集结果 | ||
print(f"Test RMSE: {test_rmse}") | ||
print(f"Test R2 Score: {test_r2}") | ||
print(f"Adjusted Percent Error: {adjusted_percent_error}") | ||
|
||
# 保存预测结果 | ||
predictions_dir = current_dir / "data" / "predictions" / "XG" | ||
predictions_dir.mkdir(parents=True, exist_ok=True) | ||
|
||
# 保存预测和真实值 | ||
pd.DataFrame(test_preds).to_csv(predictions_dir / "test_pred_xg.csv", index=False, header=False) | ||
pd.DataFrame(y_test).to_csv(predictions_dir / "test_true_xg.csv", index=False, header=False) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
空文件删除