update: mixed configs

M3RG-IITD · Jul 15, 2023 · 2a3055a · 2a3055a
1 parent e0ad1f2
commit 2a3055a
Show file tree

Hide file tree

Showing 4 changed files with 537 additions and 55 deletions.
diff --git a/configs/allegro/3bpa_lips/config.yaml b/configs/allegro/3bpa_lips/config.yaml
@@ -0,0 +1,228 @@
+# a simple example config file
+
+# Two folders will be used during the training: 'root'/process and 'root'/'run_name'
+# run_name contains logfiles and saved models
+# process contains processed data sets
+# if 'root'/'run_name' exists, 'root'/'run_name'_'year'-'month'-'day'-'hour'-'min'-'s' will be used instead.
+root: output_dir_sl/allegro/3bpa_lips
+run_name: run1
+seed: 123                                                                         # model seed
+dataset_seed: 456                                                                 # data set seed
+append: true                                                                      # set true if a restarted run should append to the previous log file
+default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
+
+# -- network --
+# tell nequip which modules to build
+model_builders:
+ - allegro.model.Allegro
+ # the typical model builders from `nequip` can still be used:
+ - PerSpeciesRescale
+ - ForceOutput
+ - RescaleEnergyEtc
+
+# radial cutoff in length units
+r_max: 5.0
+
+# average number of neighbors in an environment is used to normalize the sum, auto precomputed it automitcally 
+avg_num_neighbors: auto
+
+# radial basis
+# set true to train the bessel roots
+BesselBasis_trainable: true
+
+# p-parameter in envelope function, as proposed in Klicpera, J. et al., arXiv:2003.03123 
+# sets it BOTH for the RadialBasisProjection AND the Allegro_Module
+PolynomialCutoff_p: 6  
+
+# symmetry
+# maximum order l to use in spherical harmonics embedding, 1 is basedline (fast), 2 is more accurate, but slower, 3 highly accurate but slow
+l_max: 2
+
+# whether to include E(3)-symmetry / parity
+# allowed: o3_full, o3_restricted, so3
+parity: o3_full  
+
+# number of tensor product layers, 1-3 usually best, more is more accurate but slower
+num_layers: 2
+
+# number of features, more is more accurate but slower, 1, 4, 8, 16, 64, 128 are good options to try depending on data set
+env_embed_multiplicity: 64
+
+# whether or not to embed the initial edge, true often works best
+embed_initial_edge: true
+
+# hidden layer dimensions of the 2-body embedding MLP
+two_body_latent_mlp_latent_dimensions: [128, 256, 512, 1024]
+# nonlinearity used in the 2-body embedding MLP
+two_body_latent_mlp_nonlinearity: silu
+# weight initialization of the 2-body embedding MLP
+two_body_latent_mlp_initialization: uniform
+
+# hidden layer dimensions of the latent MLP
+# these MLPs are cheap if you have have large l/env_embed_multiplicity, so a good place to put model capacity if you can afford it
+# only if you are in the ultra-fast/scalable regime, make these smaller
+latent_mlp_latent_dimensions: [1024, 1024, 1024]
+
+# nonlinearity used in the latent MLP
+latent_mlp_nonlinearity: silu
+
+# weight initialization of the latent MLP
+latent_mlp_initialization: uniform
+
+# whether to use a resnet update in the scalar latent latent space, true works best usually
+latent_resnet: true
+
+# hidden layer dimensions of the environment embedding mlp, none work best (will build a single linear layer)
+env_embed_mlp_latent_dimensions: []
+
+# nonlinearity used in the environment embedding mlp
+env_embed_mlp_nonlinearity: null
+
+# weight initialzation of the environment embedding mlp
+env_embed_mlp_initialization: uniform
+
+# - end allegro layers -
+
+# Final MLP to go from Allegro latent space to edge energies:
+
+# hidden layer dimensions of the per-edge energy final MLP
+edge_eng_mlp_latent_dimensions: [128]
+
+# nonlinearity used in the per-edge energy final MLP
+edge_eng_mlp_nonlinearity: null
+
+# weight initialzation in the per-edge energy final MLP
+edge_eng_mlp_initialization: uniform
+# data set
+# the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
+# key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
+# all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
+# note that if your data set uses pbc, you need to also pass an array that maps to the nequip "pbc" key
+# for extxyz file
+dataset: ase
+dataset_file_name: data_sl/mdsim_data/mixed_check/train/botnet.xyz
+ase_args:
+  format: extxyz                                                                     # type of data set, can be npz or ase
+
+
+
+
+validation_dataset: ase
+validation_dataset_file_name: data_sl/mdsim_data/mixed_check/val/botnet.xyz             # path to data set file
+
+# A list of atomic types to be found in the data. The NequIP types will be named with the chemical symbols, and inputs with the correct atomic numbers will be mapped to the corresponding types.
+chemical_symbols:
+  - H
+  - C
+  - N
+  - O
+  - Li
+  - P
+  - S
+
+# logging
+wandb: True # we recommend using wandb for logging
+wandb_project: mdbench                                                # project name used in wandb
+
+verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error; case insensitive
+log_batch_freq: 10                                                                 # batch frequency, how often to print training errors withinin the same epoch
+log_epoch_freq: 1                                                                  # epoch frequency, how often to print 
+save_checkpoint_freq: -1                                                           # frequency to save the intermediate checkpoint. no saving of intermediate checkpoints when the value is not positive.
+save_ema_checkpoint_freq: -1                                                       # frequency to save the intermediate ema checkpoint. no saving of intermediate checkpoints when the value is not positive.
+
+# training
+n_train: 19000                                                                    # number of training data
+n_val: 1000                                                                   # number of validation data
+learning_rate: 0.005                                                               # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune
+batch_size: 1                                                                # batch size, we found it important to keep this small for most applications including forces (1-5); for energy-only training, higher batch sizes work better
+max_epochs: 10000                                                              # stop training after _ number of epochs, we set a small number here to have an example that finished within a few minutes, but in practice we recommend using a very large number, as e.g. 1million and then to just use early stopping and not train the full number of epochs
+train_val_split: random                                                            # can be random or sequential. if sequential, first n_train elements are training, next n_val are val, else random, usually random is the right choice
+shuffle: true                                                                      # if true, the data loader will shuffle the data, usually a good idea
+metrics_key: validation_loss                                                       # metrics used for scheduling and saving best model. Options: `set`_`quantity`, set can be either "train" or "validation, "quantity" can be loss or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
+use_ema: true                                                                      # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
+ema_decay: 0.99                                                                    # ema weight, typically set to 0.99 or 0.999
+ema_use_num_updates: true                                                          # whether to use number of updates when computing averages
+report_init_validation: true                                                       # if True, report the validation error for just initialized model
+
+# early stopping based on metrics values.
+early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
+  validation_loss: 50
+
+early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
+  LR: 1.0e-6
+
+early_stopping_upper_bounds:                                                       # stop early if a metric value is higher than the bound
+  cumulative_wall: 604800
+
+# loss function
+loss_coeffs:                                                                        
+  forces: 6889                                                                     # if using PerAtomMSELoss, a default weight of 1:1 on each should work well
+  total_energy:                                                                    
+    - 1
+    - PerAtomMSELoss
+
+# output metrics
+metrics_components:
+  - - forces                               # key 
+    - mae                                  # "rmse" or "mae"
+  - - forces
+    - rmse
+  - - forces
+    - mae
+    - PerSpecies: False                     # if true, per species contribution is counted separately
+      report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
+  - - forces                                
+    - rmse                                  
+    - PerSpecies: False                  
+      report_per_component: False    
+  - - total_energy
+    - mae    
+  - - total_energy
+    - mae
+    - PerAtom: True                        # if true, energy is normalized by the number of atoms
+
+# optimizer, may be any optimizer defined in torch.optim
+# the name `optimizer_name`is case sensitive
+optimizer_name: Adam                                                               # default optimizer is Adam 
+optimizer_amsgrad: true
+optimizer_betas: !!python/tuple
+  - 0.9
+  - 0.999
+optimizer_eps: 1.0e-08
+optimizer_weight_decay: 0
+
+# gradient clipping using torch.nn.utils.clip_grad_norm_
+# see https://pytorch.org/docs/stable/generated/torch.nn.utils.clip_grad_norm_.html#torch.nn.utils.clip_grad_norm_
+# setting to inf or null disables it
+max_gradient_norm: null
+
+# lr scheduler, currently only supports the two options listed in full.yaml, i.e. on-pleteau and cosine annealing with warm restarts, if you need more please file an issue
+# here: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
+lr_scheduler_name: ReduceLROnPlateau
+lr_scheduler_patience: 5
+lr_scheduler_factor: 0.8
+
+global_rescale_scale_trainable: false
+per_species_rescale_trainable: true
+per_species_rescale_shifts: dataset_per_atom_total_energy_mean
+per_species_rescale_scales: dataset_forces_rms
+
+# # we provide a series of options to shift and scale the data
+# # these are for advanced use and usually the defaults work very well
+# # the default is to scale the atomic energy and forces by scaling them by the force standard deviation and to shift the energy by the mean atomic energy
+# # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
+
+# # whether the shifts and scales are trainable. Defaults to False. Optional
+# per_species_rescale_shifts_trainable: false
+# per_species_rescale_scales_trainable: false
+
+# # initial atomic energy shift for each species. default to the mean of per atom energy. Optional
+# # the value can be a constant float value, an array for each species, or a string that defines a statistics over the training dataset
+# per_species_rescale_shifts: dataset_per_atom_total_energy_mean
+
+# # initial atomic energy scale for each species. Optional.
+# # the value can be a constant float value, an array for each species, or a string
+# per_species_rescale_scales: dataset_forces_rms
+
+# # if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values.
+# # per_species_rescale_arguments_in_dataset_units: True