Skip to content

Commit

Permalink
update: mixed configs
Browse files Browse the repository at this point in the history
  • Loading branch information
utkarshp1161 committed Jul 15, 2023
1 parent e0ad1f2 commit 2a3055a
Show file tree
Hide file tree
Showing 4 changed files with 537 additions and 55 deletions.
228 changes: 228 additions & 0 deletions configs/allegro/3bpa_lips/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# a simple example config file

# Two folders will be used during the training: 'root'/process and 'root'/'run_name'
# run_name contains logfiles and saved models
# process contains processed data sets
# if 'root'/'run_name' exists, 'root'/'run_name'_'year'-'month'-'day'-'hour'-'min'-'s' will be used instead.
root: output_dir_sl/allegro/3bpa_lips
run_name: run1
seed: 123 # model seed
dataset_seed: 456 # data set seed
append: true # set true if a restarted run should append to the previous log file
default_dtype: float32 # type of float to use, e.g. float32 and float64

# -- network --
# tell nequip which modules to build
model_builders:
- allegro.model.Allegro
# the typical model builders from `nequip` can still be used:
- PerSpeciesRescale
- ForceOutput
- RescaleEnergyEtc

# radial cutoff in length units
r_max: 5.0

# average number of neighbors in an environment is used to normalize the sum, auto precomputed it automitcally
avg_num_neighbors: auto

# radial basis
# set true to train the bessel roots
BesselBasis_trainable: true

# p-parameter in envelope function, as proposed in Klicpera, J. et al., arXiv:2003.03123
# sets it BOTH for the RadialBasisProjection AND the Allegro_Module
PolynomialCutoff_p: 6

# symmetry
# maximum order l to use in spherical harmonics embedding, 1 is basedline (fast), 2 is more accurate, but slower, 3 highly accurate but slow
l_max: 2

# whether to include E(3)-symmetry / parity
# allowed: o3_full, o3_restricted, so3
parity: o3_full

# number of tensor product layers, 1-3 usually best, more is more accurate but slower
num_layers: 2

# number of features, more is more accurate but slower, 1, 4, 8, 16, 64, 128 are good options to try depending on data set
env_embed_multiplicity: 64

# whether or not to embed the initial edge, true often works best
embed_initial_edge: true

# hidden layer dimensions of the 2-body embedding MLP
two_body_latent_mlp_latent_dimensions: [128, 256, 512, 1024]
# nonlinearity used in the 2-body embedding MLP
two_body_latent_mlp_nonlinearity: silu
# weight initialization of the 2-body embedding MLP
two_body_latent_mlp_initialization: uniform

# hidden layer dimensions of the latent MLP
# these MLPs are cheap if you have have large l/env_embed_multiplicity, so a good place to put model capacity if you can afford it
# only if you are in the ultra-fast/scalable regime, make these smaller
latent_mlp_latent_dimensions: [1024, 1024, 1024]

# nonlinearity used in the latent MLP
latent_mlp_nonlinearity: silu

# weight initialization of the latent MLP
latent_mlp_initialization: uniform

# whether to use a resnet update in the scalar latent latent space, true works best usually
latent_resnet: true

# hidden layer dimensions of the environment embedding mlp, none work best (will build a single linear layer)
env_embed_mlp_latent_dimensions: []

# nonlinearity used in the environment embedding mlp
env_embed_mlp_nonlinearity: null

# weight initialzation of the environment embedding mlp
env_embed_mlp_initialization: uniform

# - end allegro layers -

# Final MLP to go from Allegro latent space to edge energies:

# hidden layer dimensions of the per-edge energy final MLP
edge_eng_mlp_latent_dimensions: [128]

# nonlinearity used in the per-edge energy final MLP
edge_eng_mlp_nonlinearity: null

# weight initialzation in the per-edge energy final MLP
edge_eng_mlp_initialization: uniform
# data set
# the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
# key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
# all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
# note that if your data set uses pbc, you need to also pass an array that maps to the nequip "pbc" key
# for extxyz file
dataset: ase
dataset_file_name: data_sl/mdsim_data/mixed_check/train/botnet.xyz
ase_args:
format: extxyz # type of data set, can be npz or ase




validation_dataset: ase
validation_dataset_file_name: data_sl/mdsim_data/mixed_check/val/botnet.xyz # path to data set file

# A list of atomic types to be found in the data. The NequIP types will be named with the chemical symbols, and inputs with the correct atomic numbers will be mapped to the corresponding types.
chemical_symbols:
- H
- C
- N
- O
- Li
- P
- S

# logging
wandb: True # we recommend using wandb for logging
wandb_project: mdbench # project name used in wandb

verbose: info # the same as python logging, e.g. warning, info, debug, error; case insensitive
log_batch_freq: 10 # batch frequency, how often to print training errors withinin the same epoch
log_epoch_freq: 1 # epoch frequency, how often to print
save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving of intermediate checkpoints when the value is not positive.
save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving of intermediate checkpoints when the value is not positive.

# training
n_train: 19000 # number of training data
n_val: 1000 # number of validation data
learning_rate: 0.005 # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune
batch_size: 1 # batch size, we found it important to keep this small for most applications including forces (1-5); for energy-only training, higher batch sizes work better
max_epochs: 10000 # stop training after _ number of epochs, we set a small number here to have an example that finished within a few minutes, but in practice we recommend using a very large number, as e.g. 1million and then to just use early stopping and not train the full number of epochs
train_val_split: random # can be random or sequential. if sequential, first n_train elements are training, next n_val are val, else random, usually random is the right choice
shuffle: true # if true, the data loader will shuffle the data, usually a good idea
metrics_key: validation_loss # metrics used for scheduling and saving best model. Options: `set`_`quantity`, set can be either "train" or "validation, "quantity" can be loss or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
use_ema: true # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
ema_decay: 0.99 # ema weight, typically set to 0.99 or 0.999
ema_use_num_updates: true # whether to use number of updates when computing averages
report_init_validation: true # if True, report the validation error for just initialized model

# early stopping based on metrics values.
early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs
validation_loss: 50

early_stopping_lower_bounds: # stop early if a metric value is lower than the bound
LR: 1.0e-6

early_stopping_upper_bounds: # stop early if a metric value is higher than the bound
cumulative_wall: 604800

# loss function
loss_coeffs:
forces: 6889 # if using PerAtomMSELoss, a default weight of 1:1 on each should work well
total_energy:
- 1
- PerAtomMSELoss

# output metrics
metrics_components:
- - forces # key
- mae # "rmse" or "mae"
- - forces
- rmse
- - forces
- mae
- PerSpecies: False # if true, per species contribution is counted separately
report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
- - forces
- rmse
- PerSpecies: False
report_per_component: False
- - total_energy
- mae
- - total_energy
- mae
- PerAtom: True # if true, energy is normalized by the number of atoms

# optimizer, may be any optimizer defined in torch.optim
# the name `optimizer_name`is case sensitive
optimizer_name: Adam # default optimizer is Adam
optimizer_amsgrad: true
optimizer_betas: !!python/tuple
- 0.9
- 0.999
optimizer_eps: 1.0e-08
optimizer_weight_decay: 0

# gradient clipping using torch.nn.utils.clip_grad_norm_
# see https://pytorch.org/docs/stable/generated/torch.nn.utils.clip_grad_norm_.html#torch.nn.utils.clip_grad_norm_
# setting to inf or null disables it
max_gradient_norm: null

# lr scheduler, currently only supports the two options listed in full.yaml, i.e. on-pleteau and cosine annealing with warm restarts, if you need more please file an issue
# here: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
lr_scheduler_name: ReduceLROnPlateau
lr_scheduler_patience: 5
lr_scheduler_factor: 0.8

global_rescale_scale_trainable: false
per_species_rescale_trainable: true
per_species_rescale_shifts: dataset_per_atom_total_energy_mean
per_species_rescale_scales: dataset_forces_rms

# # we provide a series of options to shift and scale the data
# # these are for advanced use and usually the defaults work very well
# # the default is to scale the atomic energy and forces by scaling them by the force standard deviation and to shift the energy by the mean atomic energy
# # in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom

# # whether the shifts and scales are trainable. Defaults to False. Optional
# per_species_rescale_shifts_trainable: false
# per_species_rescale_scales_trainable: false

# # initial atomic energy shift for each species. default to the mean of per atom energy. Optional
# # the value can be a constant float value, an array for each species, or a string that defines a statistics over the training dataset
# per_species_rescale_shifts: dataset_per_atom_total_energy_mean

# # initial atomic energy scale for each species. Optional.
# # the value can be a constant float value, an array for each species, or a string
# per_species_rescale_scales: dataset_forces_rms

# # if explicit numbers are given for the shifts/scales, this parameter must specify whether the given numbers are unitless shifts/scales or are in the units of the dataset. If ``True``, any global rescalings will correctly be applied to the per-species values.
# # per_species_rescale_arguments_in_dataset_units: True
Loading

0 comments on commit 2a3055a

Please sign in to comment.