diff --git a/changelog.md b/changelog.md index 6f55f718..694e0752 100644 --- a/changelog.md +++ b/changelog.md @@ -2,14 +2,14 @@ v3.0.1 * Removed deprecated molecules. * Fix error in ``kgcnn.data.transform.scaler.serial`` -* Fix error in ``QMDataset`` for if attributes have been chosen. Now `set_attributes` does not cause an error. +* Fix error in ``QMDataset`` if attributes have been chosen. Now `set_attributes` does not cause an error. * Fix error in ``QMDataset`` with labels without SDF file. * Fix error in ``kgcnn.layers.conv.GraphSageNodeLayer`` . * Add ``reverse_edge_indices`` option to `GraphDict.from_networkx` . Fixed error in connection with `kgcnn.crystal` . -* Started with ``kgcnn.io.file`` . Experimental. +* Started with ``kgcnn.io.file`` . Experimental. Will get more updates. * Fix error with `StandardLabelScaler` inheritance. * Added workflow notebook examples. -* +* Fix error in import ``kgcnn.crystal.periodic_table`` to now properly include package data. v3.0.0 diff --git a/kgcnn/io/file.py b/kgcnn/io/file.py index 03871078..93b87086 100644 --- a/kgcnn/io/file.py +++ b/kgcnn/io/file.py @@ -29,6 +29,7 @@ def _check_for_inner_shape(array_list: List[np.ndarray]) -> Union[None, tuple, l class RaggedTensorNumpyFile: + """Class representing a NumPy '.npz' file to store a ragged tensor on disk.""" _device = '/cpu:0' @@ -105,6 +106,7 @@ def exists(self): class RaggedTensorHDFile: + """Class representing a HDF '.hdf5' file to store a ragged tensor on disk.""" _device = '/cpu:0' diff --git a/training/hyper/hyper_mp_jdft2d.py b/training/hyper/hyper_mp_jdft2d.py index 6b427422..4de6289f 100644 --- a/training/hyper/hyper_mp_jdft2d.py +++ b/training/hyper/hyper_mp_jdft2d.py @@ -511,16 +511,27 @@ "cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": True}}, "fit": { - "batch_size": 32, "epochs": 1000, "validation_freq": 10, "verbose": 2, + "batch_size": 64, "epochs": 800, "validation_freq": 10, "verbose": 2, "callbacks": [ - {"class_name": "kgcnn>LinearLearningRateScheduler", "config": { - "learning_rate_start": 0.0005, "learning_rate_stop": 0.5e-05, "epo_min": 100, "epo": 1000, - "verbose": 0} - } + # {"class_name": "kgcnn>LinearLearningRateScheduler", "config": { + # "learning_rate_start": 0.0005, "learning_rate_stop": 0.5e-05, "epo_min": 0, "epo": 800, + # "verbose": 0} + # } ] }, "compile": { - "optimizer": {"class_name": "Adam", "config": {"lr": 0.0005}}, + "optimizer": { + "class_name": "Adam", + "config": { + "learning_rate": { + "class_name": "kgcnn>KerasPolynomialDecaySchedule", + "config": { + "dataset_size": 106.201, "batch_size": 64, "epochs": 800, + "lr_start": 0.0005, "lr_stop": 1.0e-05 + } + } + } + }, "loss": "mean_absolute_error" }, "scaler": { diff --git a/training/results/MatProjectEFormDataset/coGN/coGN_MatProjectEFormDataset_score.yaml b/training/results/MatProjectEFormDataset/coGN/coGN_MatProjectEFormDataset_score.yaml new file mode 100644 index 00000000..7942b081 --- /dev/null +++ b/training/results/MatProjectEFormDataset/coGN/coGN_MatProjectEFormDataset_score.yaml @@ -0,0 +1,124 @@ +data_unit: eV/atom +date_time: '2023-05-18 22:54:14' +epochs: +- 800 +- 800 +- 800 +- 800 +- 800 +execute_folds: +- 4 +kgcnn_version: 3.0.1 +loss: +- 0.0008860895759426057 +- 0.0008905278518795967 +- 0.0009073111577890813 +- 0.0009844396263360977 +- 0.0009256843477487564 +max_loss: +- 0.17345885932445526 +- 0.17859020829200745 +- 0.1676129251718521 +- 0.17198680341243744 +- 0.17687316238880157 +max_scaled_mean_absolute_error: +- 0.20188768208026886 +- 0.20766201615333557 +- 0.1948785036802292 +- 0.20020614564418793 +- 0.20556315779685974 +max_scaled_root_mean_squared_error: +- 0.32694771885871887 +- 0.33746638894081116 +- 0.3113785982131958 +- 0.3249269425868988 +- 0.339813232421875 +max_val_loss: +- 0.04921264201402664 +- 0.04789257422089577 +- 0.04781835153698921 +- 0.051487576216459274 +- 0.046009182929992676 +max_val_scaled_mean_absolute_error: +- 0.057278186082839966 +- 0.055688779801130295 +- 0.05559692159295082 +- 0.059935588389635086 +- 0.053472232073545456 +max_val_scaled_root_mean_squared_error: +- 0.0925888791680336 +- 0.09510543197393417 +- 0.09771958738565445 +- 0.09067843109369278 +- 0.10363566875457764 +min_loss: +- 0.0008860895759426057 +- 0.0008905278518795967 +- 0.0009073111577890813 +- 0.0009844396263360977 +- 0.0009256843477487564 +min_scaled_mean_absolute_error: +- 0.0010313139064237475 +- 0.0010354919359087944 +- 0.0010549012804403901 +- 0.0011459658853709698 +- 0.0010758370626717806 +min_scaled_root_mean_squared_error: +- 0.009279688820242882 +- 0.009094045497477055 +- 0.008167327381670475 +- 0.008846502751111984 +- 0.008383971638977528 +min_val_loss: +- 0.01455119252204895 +- 0.01417975127696991 +- 0.014507561922073364 +- 0.014709792099893093 +- 0.014524188823997974 +min_val_scaled_mean_absolute_error: +- 0.016936026513576508 +- 0.016488006338477135 +- 0.01686747744679451 +- 0.01712336204946041 +- 0.016880128532648087 +min_val_scaled_root_mean_squared_error: +- 0.041658543050289154 +- 0.04964727163314819 +- 0.04548873007297516 +- 0.04854537919163704 +- 0.05381939187645912 +model_class: make_model +model_name: coGN +model_version: '' +multi_target_indices: null +number_histories: 5 +scaled_mean_absolute_error: +- 0.0010313139064237475 +- 0.0010354919359087944 +- 0.0010549012804403901 +- 0.0011459658853709698 +- 0.0010758370626717806 +scaled_root_mean_squared_error: +- 0.009279688820242882 +- 0.009094045497477055 +- 0.008167327381670475 +- 0.008849424310028553 +- 0.008383971638977528 +val_loss: +- 0.014554254710674286 +- 0.014186271466314793 +- 0.014507561922073364 +- 0.014712574891746044 +- 0.014524188823997974 +val_scaled_mean_absolute_error: +- 0.01693958416581154 +- 0.01649557054042816 +- 0.01686747744679451 +- 0.017126601189374924 +- 0.016880128532648087 +val_scaled_root_mean_squared_error: +- 0.04193956032395363 +- 0.05034559220075607 +- 0.045685552060604095 +- 0.04965636134147644 +- 0.054531652480363846 diff --git a/training/results/MatProjectEFormDataset/coGN/coGN_hyper.json b/training/results/MatProjectEFormDataset/coGN/coGN_hyper.json new file mode 100644 index 00000000..98a80a4a --- /dev/null +++ b/training/results/MatProjectEFormDataset/coGN/coGN_hyper.json @@ -0,0 +1 @@ +{"model": {"module_name": "kgcnn.literature.coGN", "class_name": "make_model", "config": {"name": "coGN", "inputs": {"offset": {"shape": [null, 3], "name": "offset", "dtype": "float32", "ragged": true}, "cell_translation": null, "affine_matrix": null, "voronoi_ridge_area": null, "atomic_number": {"shape": [null], "name": "atomic_number", "dtype": "int32", "ragged": true}, "frac_coords": null, "coords": null, "multiplicity": {"shape": [null], "name": "multiplicity", "dtype": "int32", "ragged": true}, "lattice_matrix": null, "edge_indices": {"shape": [null, 2], "name": "edge_indices", "dtype": "int32", "ragged": true}, "line_graph_edge_indices": null}}}, "training": {"cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": true}}, "fit": {"batch_size": 64, "epochs": 800, "validation_freq": 10, "verbose": 2, "callbacks": []}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": {"class_name": "kgcnn>KerasPolynomialDecaySchedule", "config": {"dataset_size": 106201, "batch_size": 64, "epochs": 800, "lr_start": 0.0005, "lr_stop": 1e-05}}}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}, "multi_target_indices": null}, "data": {"dataset": {"class_name": "MatProjectEFormDataset", "module_name": "kgcnn.data.datasets.MatProjectEFormDataset", "config": {}, "methods": [{"set_representation": {"pre_processor": {"class_name": "KNNAsymmetricUnitCell", "module_name": "kgcnn.crystal.preprocessor", "config": {"k": 24}}, "reset_graphs": false}}]}, "data_unit": "eV/atom"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "3.0.1"}} \ No newline at end of file diff --git a/training/results/MatProjectJdft2dDataset/coGN/coGN_MatProjectJdft2dDataset_score.yaml b/training/results/MatProjectJdft2dDataset/coGN/coGN_MatProjectJdft2dDataset_score.yaml index c6b3bf10..a1772dae 100644 --- a/training/results/MatProjectJdft2dDataset/coGN/coGN_MatProjectJdft2dDataset_score.yaml +++ b/training/results/MatProjectJdft2dDataset/coGN/coGN_MatProjectJdft2dDataset_score.yaml @@ -1,141 +1,123 @@ data_unit: meV/atom -date_time: '2023-04-14 12:49:57' +date_time: '2023-05-22 15:56:06' epochs: -- 1000 -- 1000 -- 1000 -- 1000 -- 1000 +- 800 +- 800 +- 800 +- 800 +- 800 execute_folds: null kgcnn_version: 3.0.1 loss: -- 0.004769620951265097 -- 0.005449751392006874 -- 0.003921336028724909 -- 0.00518146762624383 -- 0.00355949136428535 -lr: -- 5.549999968934571e-06 -- 5.549999968934571e-06 -- 5.549999968934571e-06 -- 5.549999968934571e-06 -- 5.549999968934571e-06 +- 0.024050358682870865 +- 0.022292375564575195 +- 0.04148034378886223 +- 0.02464066818356514 +- 0.026128828525543213 max_loss: -- 0.684315025806427 -- 0.6869133114814758 -- 0.8229182958602905 -- 0.5697979927062988 -- 0.7870522737503052 -max_lr: -- 0.0005000000237487257 -- 0.0005000000237487257 -- 0.0005000000237487257 -- 0.0005000000237487257 -- 0.0005000000237487257 +- 0.9322065114974976 +- 0.6759727597236633 +- 0.9919328093528748 +- 1.1867258548736572 +- 0.9813737869262695 max_scaled_mean_absolute_error: -- 99.2149887084961 -- 95.75604248046875 -- 112.2119369506836 -- 73.36629486083984 -- 94.12844848632812 +- 135.1553955078125 +- 94.23090362548828 +- 135.2584991455078 +- 152.8009033203125 +- 117.36857604980469 max_scaled_root_mean_squared_error: -- 176.85372924804688 -- 201.79014587402344 -- 205.59689331054688 -- 147.19546508789062 -- 212.60919189453125 +- 204.7133331298828 +- 176.1343994140625 +- 208.70352172851562 +- 223.9566650390625 +- 178.64132690429688 max_val_loss: -- 0.32587647438049316 -- 0.29785603284835815 -- 0.332061767578125 -- 0.522292971611023 -- 0.4799647331237793 +- 0.19805526733398438 +- 0.2823616862297058 +- 0.30979153513908386 +- 0.422086238861084 +- 0.5009438395500183 max_val_scaled_mean_absolute_error: -- 47.24700164794922 -- 41.521263122558594 -- 45.27945327758789 -- 67.2496109008789 -- 57.40195846557617 +- 28.714923858642578 +- 39.36134338378906 +- 42.24272155761719 +- 54.34714889526367 +- 59.910980224609375 max_val_scaled_root_mean_squared_error: -- 77.296875 -- 108.43400573730469 -- 116.89693450927734 -- 171.8570098876953 -- 169.56040954589844 +- 65.96587371826172 +- 103.17406463623047 +- 120.74960327148438 +- 137.4087677001953 +- 174.64865112304688 min_loss: -- 0.004769620951265097 -- 0.005449751392006874 -- 0.0039031626656651497 -- 0.005170580931007862 -- 0.00355949136428535 -min_lr: -- 5.549999968934571e-06 -- 5.549999968934571e-06 -- 5.549999968934571e-06 -- 5.549999968934571e-06 -- 5.549999968934571e-06 +- 0.02174421027302742 +- 0.022292375564575195 +- 0.041175033897161484 +- 0.023035436868667603 +- 0.025760240852832794 min_scaled_mean_absolute_error: -- 0.6915203928947449 -- 0.7596978545188904 -- 0.5322293043136597 -- 0.6657556891441345 -- 0.4257014989852905 +- 3.152570962905884 +- 3.107567310333252 +- 5.61456823348999 +- 2.9660065174102783 +- 3.080826759338379 min_scaled_root_mean_squared_error: -- 6.465406894683838 -- 6.189540863037109 -- 5.289906978607178 -- 5.318819046020508 -- 3.567173957824707 +- 14.776631355285645 +- 13.112813949584961 +- 65.00386047363281 +- 12.033204078674316 +- 16.18703269958496 min_val_loss: -- 0.17681992053985596 -- 0.2272680252790451 -- 0.2653597593307495 -- 0.32978519797325134 -- 0.3783282935619354 +- 0.16276854276657104 +- 0.20488880574703217 +- 0.272601842880249 +- 0.34563127160072327 +- 0.39677026867866516 min_val_scaled_mean_absolute_error: -- 25.636127471923828 -- 31.68126106262207 -- 36.1840705871582 -- 42.462608337402344 -- 45.24662780761719 +- 23.598899841308594 +- 28.561588287353516 +- 37.17158508300781 +- 44.50292205810547 +- 47.45221710205078 min_val_scaled_root_mean_squared_error: -- 50.430694580078125 -- 76.94483184814453 -- 108.50737762451172 -- 113.26813507080078 -- 157.44790649414062 +- 48.27088165283203 +- 79.85539245605469 +- 109.40503692626953 +- 123.80563354492188 +- 157.54769897460938 model_class: make_model model_name: coGN model_version: '' multi_target_indices: null number_histories: 5 scaled_mean_absolute_error: -- 0.6915203928947449 -- 0.7596978545188904 -- 0.5347076058387756 -- 0.6671573519706726 -- 0.4257014989852905 +- 3.4869260787963867 +- 3.107567310333252 +- 5.656198501586914 +- 3.1726930141448975 +- 3.124908447265625 scaled_root_mean_squared_error: -- 6.484260559082031 -- 6.207010269165039 -- 5.373446464538574 -- 5.321227550506592 -- 3.575141191482544 +- 14.840513229370117 +- 13.131678581237793 +- 65.00386047363281 +- 12.261222839355469 +- 16.18703269958496 val_loss: -- 0.2100907266139984 -- 0.24597060680389404 -- 0.28280526399612427 -- 0.40292635560035706 -- 0.40379568934440613 +- 0.19038069248199463 +- 0.27222388982772827 +- 0.30869820713996887 +- 0.3786124885082245 +- 0.44899311661720276 val_scaled_mean_absolute_error: -- 30.459877014160156 -- 34.28841018676758 -- 38.56290817260742 -- 51.88015365600586 -- 48.29242706298828 +- 27.60222816467285 +- 37.948123931884766 +- 42.0936393737793 +- 48.74953842163086 +- 53.697872161865234 val_scaled_root_mean_squared_error: -- 64.46647644042969 -- 94.47454071044922 -- 113.42539978027344 -- 141.520263671875 -- 161.07005310058594 +- 60.50534439086914 +- 99.77079772949219 +- 120.40348815917969 +- 129.3235321044922 +- 174.33416748046875 diff --git a/training/results/MatProjectJdft2dDataset/coGN/coGN_hyper.json b/training/results/MatProjectJdft2dDataset/coGN/coGN_hyper.json index a7d237d8..994cc27a 100644 --- a/training/results/MatProjectJdft2dDataset/coGN/coGN_hyper.json +++ b/training/results/MatProjectJdft2dDataset/coGN/coGN_hyper.json @@ -1 +1 @@ -{"model": {"module_name": "kgcnn.literature.coGN", "class_name": "make_model", "config": {"name": "coGN", "inputs": {"offset": {"shape": [null, 3], "name": "offset", "dtype": "float32", "ragged": true}, "cell_translation": null, "affine_matrix": null, "voronoi_ridge_area": null, "atomic_number": {"shape": [null], "name": "atomic_number", "dtype": "int32", "ragged": true}, "frac_coords": null, "coords": null, "multiplicity": {"shape": [null], "name": "multiplicity", "dtype": "int32", "ragged": true}, "lattice_matrix": null, "edge_indices": {"shape": [null, 2], "name": "edge_indices", "dtype": "int32", "ragged": true}, "line_graph_edge_indices": null}}}, "training": {"cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": true}}, "fit": {"batch_size": 32, "epochs": 1000, "validation_freq": 10, "verbose": 2, "callbacks": [{"class_name": "kgcnn>LinearLearningRateScheduler", "config": {"learning_rate_start": 0.0005, "learning_rate_stop": 5e-06, "epo_min": 100, "epo": 1000, "verbose": 0}}]}, "compile": {"optimizer": {"class_name": "Adam", "config": {"lr": 0.0005}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}, "multi_target_indices": null}, "data": {"dataset": {"class_name": "MatProjectJdft2dDataset", "module_name": "kgcnn.data.datasets.MatProjectJdft2dDataset", "config": {}, "methods": [{"set_representation": {"pre_processor": {"class_name": "KNNAsymmetricUnitCell", "module_name": "kgcnn.crystal.preprocessor", "config": {"k": 24}}, "reset_graphs": false}}]}, "data_unit": "meV/atom"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "3.0.1"}} \ No newline at end of file +{"model": {"module_name": "kgcnn.literature.coGN", "class_name": "make_model", "config": {"name": "coGN", "inputs": {"offset": {"shape": [null, 3], "name": "offset", "dtype": "float32", "ragged": true}, "cell_translation": null, "affine_matrix": null, "voronoi_ridge_area": null, "atomic_number": {"shape": [null], "name": "atomic_number", "dtype": "int32", "ragged": true}, "frac_coords": null, "coords": null, "multiplicity": {"shape": [null], "name": "multiplicity", "dtype": "int32", "ragged": true}, "lattice_matrix": null, "edge_indices": {"shape": [null, 2], "name": "edge_indices", "dtype": "int32", "ragged": true}, "line_graph_edge_indices": null}}}, "training": {"cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": true}}, "fit": {"batch_size": 64, "epochs": 800, "validation_freq": 10, "verbose": 2, "callbacks": []}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": {"class_name": "kgcnn>KerasPolynomialDecaySchedule", "config": {"dataset_size": 106.201, "batch_size": 64, "epochs": 800, "lr_start": 0.0005, "lr_stop": 1e-05}}}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}, "multi_target_indices": null}, "data": {"dataset": {"class_name": "MatProjectJdft2dDataset", "module_name": "kgcnn.data.datasets.MatProjectJdft2dDataset", "config": {}, "methods": [{"set_representation": {"pre_processor": {"class_name": "KNNAsymmetricUnitCell", "module_name": "kgcnn.crystal.preprocessor", "config": {"k": 24}}, "reset_graphs": false}}]}, "data_unit": "meV/atom"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "3.0.1"}} \ No newline at end of file diff --git a/training/results/README.md b/training/results/README.md index ca3c4e27..929aec60 100644 --- a/training/results/README.md +++ b/training/results/README.md @@ -89,7 +89,7 @@ Materials Project dataset from Matbench with 636 crystal structures and their co | model | kgcnn | epochs | MAE [meV/atom] | RMSE [meV/atom] | |:-----------------------------|:--------|---------:|:------------------------|:--------------------------| | CGCNN.make_crystal_model | 2.2.2 | 1000 | 42.6352 ± 9.6715 | **112.4714 ± 37.9213** | -| coGN | 3.0.1 | 1000 | **40.6968 ± 8.1630** | 114.9913 ± 34.0681 | +| coGN | 3.0.1 | 800 | **42.0183 ± 9.0124** | 116.8675 ± 37.2534 | | DimeNetPP.make_crystal_model | 2.2.2 | 780 | 49.2113 ± 12.7431 | 124.7198 ± 38.4492 | | Megnet.make_crystal_model | 2.2.2 | 1000 | 56.5205 ± 10.8723 | 136.3116 ± 31.2617 | | PAiNN.make_crystal_model | 2.2.2 | 800 | 50.5886 ± 9.9009 | 117.7118 ± 33.4786 | @@ -188,11 +188,12 @@ Materials Project dataset from Matbench with 132752 crystal structures and their | model | kgcnn | epochs | MAE [eV/atom] | RMSE [eV/atom] | |:-----------------------------|:--------|---------:|:-----------------------|:-----------------------| | CGCNN.make_crystal_model | 2.1.1 | 1000 | 0.0369 ± 0.0003 | 0.0873 ± 0.0026 | +| coGN | 3.0.1 | 800 | **0.0169 ± 0.0002** | **0.0484 ± 0.0043** | | DimeNetPP.make_crystal_model | 2.1.1 | 780 | 0.0233 ± 0.0005 | 0.0644 ± 0.0020 | | MEGAN | 2.1.1 | 800 | 0.0397 ± 0.0009 | 0.0902 ± 0.0041 | | Megnet.make_crystal_model | 2.1.0 | 1000 | 0.0247 ± 0.0006 | 0.0639 ± 0.0028 | | PAiNN.make_crystal_model | 2.1.1 | 800 | 0.0244 ± 0.0002 | 0.0568 ± 0.0032 | -| Schnet.make_crystal_model | 2.1.1 | 800 | **0.0215 ± 0.0003** | **0.0525 ± 0.0030** | +| Schnet.make_crystal_model | 2.1.1 | 800 | 0.0215 ± 0.0003 | 0.0525 ± 0.0030 | #### MutagenicityDataset