diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..998ae250 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,46 @@ +name: Linting and code formatting + +on: [] + # Trigger the workflow on push or pull request, + # but only for the main branch + # push: + # branches: [] + # pull_request: + # branches: [] + + +jobs: + build-linux: + runs-on: ubuntu-latest + + steps: + # Setup + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8.10 + - name: Get cache + uses: actions/cache@v2 + with: + path: /opt/hostedtoolcache/Python/3.8.10/x64/lib/python3.8/site-packages + # Look to see if there is a cache hit for the corresponding requirements file + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + + # Install packages + - name: Install packages required for installation + run: python -m pip install --upgrade pip setuptools wheel + - name: Install dependencies + run: pip install -r requirements.txt + + # Check code + - name: Check formatting with yapf + run: python -m yapf --style=.style.yapf --diff --recursive . +# - name: Lint with flake8 +# run: flake8 --config=.flake8 . +# - name: Check type annotations with mypy +# run: mypy --config-file=.mypy.ini . + + - name: Test with pytest + run: python -m pytest tests diff --git a/.gitignore b/.gitignore index 8b5f3f64..3817d9f3 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,9 @@ dist/ # DS_Store .DS_Store +*.models +*.pt /wandb +*.xyz /checkpoints *.model diff --git a/README.md b/README.md index 31d466af..c628eeb8 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ A partial documentation is available at: https://mace-docs.readthedocs.io ### 1. Requirements: - Python >= 3.7 (for openMM, use Python = 3.9) -- [PyTorch](https://pytorch.org/) >= 1.12 **(training with float64 is not supported with PyTorch 2.1 but is supported with 2.2 and later)** +- [PyTorch](https://pytorch.org/) >= 1.12 **(training with float64 is not supported with PyTorch 2.1 but is supported with 2.2 and later, Pytorch 2.4.1 is not supported)** **Make sure to install PyTorch.** Please refer to the [official PyTorch installation](https://pytorch.org/get-started/locally/) for the installation instructions. Select the appropriate options for your system. diff --git a/mace/__version__.py b/mace/__version__.py index d7b30e12..47e8e016 100644 --- a/mace/__version__.py +++ b/mace/__version__.py @@ -1 +1,3 @@ -__version__ = "0.3.6" +__version__ = "0.3.7" + +__all__ = ["__version__"] diff --git a/mace/calculators/foundations_models.py b/mace/calculators/foundations_models.py index cacbaddb..a9611816 100644 --- a/mace/calculators/foundations_models.py +++ b/mace/calculators/foundations_models.py @@ -62,9 +62,9 @@ def mace_mp( try: # checkpoints release: https://github.com/ACEsuit/mace-mp/releases/tag/mace_mp_0 urls = dict( - small="https://tinyurl.com/46jrkm3v", # 2023-12-10-mace-128-L0_energy_epoch-249.model - medium="https://tinyurl.com/5yyxdm76", # 2023-12-03-mace-128-L1_epoch-199.model - large="https://tinyurl.com/5f5yavf3", # MACE_MPtrj_2022.9.model + small="https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/2023-12-10-mace-128-L0_energy_epoch-249.model", # 2023-12-10-mace-128-L0_energy_epoch-249.model + medium="https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/2023-12-03-mace-128-L1_epoch-199.model", # 2023-12-03-mace-128-L1_epoch-199.model + large="https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0/MACE_MPtrj_2022.9.model", # MACE_MPtrj_2022.9.model ) checkpoint_url = ( urls.get(model, urls["medium"]) diff --git a/mace/calculators/foundations_models/mp_vasp_e0.json b/mace/calculators/foundations_models/mp_vasp_e0.json new file mode 100644 index 00000000..01771879 --- /dev/null +++ b/mace/calculators/foundations_models/mp_vasp_e0.json @@ -0,0 +1,91 @@ +{ + "pbe": { + "1": -1.11734008, + "2": 0.00096759, + "3": -0.29754725, + "4": -0.01781697, + "5": -0.26885011, + "6": -1.26173507, + "7": -3.12438806, + "8": -1.54838784, + "9": -0.51882044, + "10": -0.01241601, + "11": -0.22883163, + "12": -0.00951015, + "13": -0.21630193, + "14": -0.8263903, + "15": -1.88816619, + "16": -0.89160769, + "17": -0.25828273, + "18": -0.04925973, + "19": -0.22697913, + "20": -0.0927795, + "21": -2.11396364, + "22": -2.50054871, + "23": -3.70477179, + "24": -5.60261985, + "25": -5.32541181, + "26": -3.52004933, + "27": -1.93555024, + "28": -0.9351969, + "29": -0.60025846, + "30": -0.1651332, + "31": -0.32990651, + "32": -0.77971828, + "33": -1.68367812, + "34": -0.76941032, + "35": -0.22213843, + "36": -0.0335879, + "37": -0.1881724, + "38": -0.06826294, + "39": -2.17084228, + "40": -2.28579303, + "41": -3.13429753, + "42": -4.60211419, + "43": -3.45201492, + "44": -2.38073513, + "45": -1.46855515, + "46": -1.4773126, + "47": -0.33954585, + "48": -0.16843877, + "49": -0.35470981, + "50": -0.83642657, + "51": -1.41101987, + "52": -0.65740879, + "53": -0.18964571, + "54": -0.00857582, + "55": -0.13771876, + "56": -0.03457659, + "57": -0.45580806, + "58": -1.3309175, + "59": -0.29671824, + "60": -0.30391193, + "61": -0.30898427, + "62": -0.25470891, + "63": -8.38001538, + "64": -10.38896525, + "65": -0.3059505, + "66": -0.30676216, + "67": -0.30874667, + "69": -0.25190039, + "70": -0.06431414, + "71": -0.31997586, + "72": -3.52770927, + "73": -3.54492209, + "75": -4.70108713, + "76": -2.88257209, + "77": -1.46779304, + "78": -0.50269936, + "79": -0.28801193, + "80": -0.12454674, + "81": -0.31737194, + "82": -0.77644932, + "83": -1.32627283, + "89": -0.26827152, + "90": -0.90817426, + "91": -2.47653193, + "92": -4.90438537, + "93": -7.63378961, + "94": -10.77237713 + } +} \ No newline at end of file diff --git a/mace/calculators/lammps_mace.py b/mace/calculators/lammps_mace.py index 8ad7e984..4211c37f 100644 --- a/mace/calculators/lammps_mace.py +++ b/mace/calculators/lammps_mace.py @@ -8,12 +8,22 @@ @compile_mode("script") class LAMMPS_MACE(torch.nn.Module): - def __init__(self, model): + def __init__(self, model, **kwargs): super().__init__() self.model = model self.register_buffer("atomic_numbers", model.atomic_numbers) self.register_buffer("r_max", model.r_max) self.register_buffer("num_interactions", model.num_interactions) + if not hasattr(model, "heads"): + model.heads = [None] + self.register_buffer( + "head", + torch.tensor( + self.model.heads.index(kwargs.get("head", self.model.heads[-1])), + dtype=torch.long, + ).unsqueeze(0), + ) + for param in self.model.parameters(): param.requires_grad = False @@ -27,6 +37,7 @@ def forward( compute_displacement = False if compute_virials: compute_displacement = True + data["head"] = self.head out = self.model( data, training=False, diff --git a/mace/calculators/mace.py b/mace/calculators/mace.py index 7a9d8c4e..06b1afba 100644 --- a/mace/calculators/mace.py +++ b/mace/calculators/mace.py @@ -171,6 +171,10 @@ def __init__( [int(z) for z in self.models[0].atomic_numbers] ) self.charges_key = charges_key + try: + self.heads = self.models[0].heads + except AttributeError: + self.heads = ["Default"] model_dtype = get_model_dtype(self.models[0]) if default_dtype == "": print( @@ -224,7 +228,7 @@ def _atoms_to_batch(self, atoms): data_loader = torch_geometric.dataloader.DataLoader( dataset=[ data.AtomicData.from_config( - config, z_table=self.z_table, cutoff=self.r_max + config, z_table=self.z_table, cutoff=self.r_max, heads=self.heads ) ], batch_size=1, @@ -257,7 +261,11 @@ def calculate(self, atoms=None, properties=None, system_changes=all_changes): if self.model_type in ["MACE", "EnergyDipoleMACE"]: batch = self._clone_batch(batch_base) - node_e0 = self.models[0].atomic_energies_fn(batch["node_attrs"]) + node_heads = batch["head"][batch["batch"]] + num_atoms_arange = torch.arange(batch["positions"].shape[0]) + node_e0 = self.models[0].atomic_energies_fn(batch["node_attrs"])[ + num_atoms_arange, node_heads + ] compute_stress = not self.use_compile else: compute_stress = False diff --git a/mace/cli/active_learning_md.py b/mace/cli/active_learning_md.py index 1899f7c1..a26be698 100644 --- a/mace/cli/active_learning_md.py +++ b/mace/cli/active_learning_md.py @@ -144,7 +144,6 @@ def main() -> None: def run(args: argparse.Namespace) -> None: - mace_fname = args.model atoms_fname = args.config atoms_index = args.config_index diff --git a/mace/cli/create_lammps_model.py b/mace/cli/create_lammps_model.py index 4cae618f..3f647906 100644 --- a/mace/cli/create_lammps_model.py +++ b/mace/cli/create_lammps_model.py @@ -1,4 +1,4 @@ -import sys +import argparse import torch from e3nn.util import jit @@ -6,13 +6,68 @@ from mace.calculators import LAMMPS_MACE -def main(): - assert len(sys.argv) == 2, f"Usage: {sys.argv[0]} model_path" +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "model_path", + type=str, + help="Path to the model to be converted to LAMMPS", + ) + parser.add_argument( + "--head", + type=str, + nargs="?", + help="Head of the model to be converted to LAMMPS", + default=None, + ) + return parser.parse_args() + + +def select_head(model): + if hasattr(model, "heads"): + heads = model.heads + else: + heads = [None] + + if len(heads) == 1: + print(f"Only one head found in the model: {heads[0]}. Skipping selection.") + return heads[0] + + print("Available heads in the model:") + for i, head in enumerate(heads): + print(f"{i + 1}: {head}") + + # Ask the user to select a head + selected = input( + f"Select a head by number (Defaulting to head: {len(heads)}, press Enter to accept): " + ) - model_path = sys.argv[1] # takes model name as command-line input + if selected.isdigit() and 1 <= int(selected) <= len(heads): + return heads[int(selected) - 1] + if selected == "": + print("No head selected. Proceeding without specifying a head.") + return None + print(f"No valid selection made. Defaulting to the last head: {heads[-1]}") + return heads[-1] + + +def main(): + args = parse_args() + model_path = args.model_path # takes model name as command-line input model = torch.load(model_path) model = model.double().to("cpu") - lammps_model = LAMMPS_MACE(model) + + if args.head is None: + head = select_head(model) + else: + head = args.head + print( + f"Selected head: {head} from command line in the list available heads: {model.heads}" + ) + + lammps_model = ( + LAMMPS_MACE(model, head=head) if head is not None else LAMMPS_MACE(model) + ) lammps_model_compiled = jit.compile(lammps_model) lammps_model_compiled.save(model_path + "-lammps.pt") diff --git a/mace/cli/eval_configs.py b/mace/cli/eval_configs.py index bf53ef88..b5700bc4 100644 --- a/mace/cli/eval_configs.py +++ b/mace/cli/eval_configs.py @@ -62,7 +62,6 @@ def main() -> None: def run(args: argparse.Namespace) -> None: - torch_tools.set_default_dtype(args.default_dtype) device = torch_tools.init_device(args.device) diff --git a/mace/cli/fine_tuning_select.py b/mace/cli/fine_tuning_select.py new file mode 100644 index 00000000..2fa5f644 --- /dev/null +++ b/mace/cli/fine_tuning_select.py @@ -0,0 +1,346 @@ +########################################################################################### +# This program is distributed under the MIT License (see MIT.md) +########################################################################################### + +import argparse +import logging +from typing import List + +import ase.data +import ase.io +import numpy as np +import torch + +from mace.calculators import MACECalculator, mace_mp + +try: + import fpsample +except ImportError: + pass + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument( + "--configs_pt", + help="path to XYZ configurations for the pretraining", + required=True, + ) + parser.add_argument( + "--configs_ft", + help="path or list of paths to XYZ configurations for the finetuning", + required=True, + ) + parser.add_argument( + "--num_samples", + help="number of samples to select for the pretraining", + type=int, + required=False, + default=None, + ) + parser.add_argument( + "--subselect", + help="method to subselect the configurations of the pretraining set", + type=str, + choices=["fps", "random"], + default="fps", + ) + parser.add_argument( + "--model", help="path to model", default="small", required=False + ) + parser.add_argument("--output", help="output path", required=True) + parser.add_argument( + "--descriptors", help="path to descriptors", required=False, default=None + ) + parser.add_argument( + "--device", + help="select device", + type=str, + choices=["cpu", "cuda"], + default="cpu", + ) + parser.add_argument( + "--default_dtype", + help="set default dtype", + type=str, + choices=["float32", "float64"], + default="float64", + ) + parser.add_argument( + "--head_pt", + help="level of head for the pretraining set", + type=str, + default=None, + ) + parser.add_argument( + "--head_ft", + help="level of head for the finetuning set", + type=str, + default=None, + ) + parser.add_argument( + "--filtering_type", + help="filtering type", + type=str, + choices=[None, "combinations", "exclusive", "inclusive"], + default=None, + ) + parser.add_argument( + "--weight_ft", + help="weight for the finetuning set", + type=float, + default=1.0, + ) + parser.add_argument( + "--weight_pt", + help="weight for the pretraining set", + type=float, + default=1.0, + ) + parser.add_argument("--seed", help="random seed", type=int, default=42) + return parser.parse_args() + + +def calculate_descriptors(atoms: List[ase.Atoms], calc: MACECalculator) -> None: + logging.info("Calculating descriptors") + for mol in atoms: + descriptors = calc.get_descriptors(mol.copy(), invariants_only=True) + # average descriptors over atoms for each element + descriptors_dict = { + element: np.mean(descriptors[mol.symbols == element], axis=0) + for element in np.unique(mol.symbols) + } + mol.info["mace_descriptors"] = descriptors_dict + + +def filter_atoms( + atoms: ase.Atoms, element_subset: List[str], filtering_type: str +) -> bool: + """ + Filters atoms based on the provided filtering type and element subset. + + Parameters: + atoms (ase.Atoms): The atoms object to filter. + element_subset (list): The list of elements to consider during filtering. + filtering_type (str): The type of filtering to apply. Can be 'none', 'exclusive', or 'inclusive'. + 'none' - No filtering is applied. + 'combinations' - Return true if `atoms` is composed of combinations of elements in the subset, false otherwise. I.e. does not require all of the specified elements to be present. + 'exclusive' - Return true if `atoms` contains *only* elements in the subset, false otherwise. + 'inclusive' - Return true if `atoms` contains all elements in the subset, false otherwise. I.e. allows additional elements. + + Returns: + bool: True if the atoms pass the filter, False otherwise. + """ + if filtering_type == "none": + return True + if filtering_type == "combinations": + atom_symbols = np.unique(atoms.symbols) + return all( + x in element_subset for x in atom_symbols + ) # atoms must *only* contain elements in the subset + if filtering_type == "exclusive": + atom_symbols = set(list(atoms.symbols)) + return atom_symbols == set(element_subset) + if filtering_type == "inclusive": + atom_symbols = np.unique(atoms.symbols) + return all( + x in atom_symbols for x in element_subset + ) # atoms must *at least* contain elements in the subset + raise ValueError( + f"Filtering type {filtering_type} not recognised. Must be one of 'none', 'exclusive', or 'inclusive'." + ) + + +class FPS: + def __init__(self, atoms_list: List[ase.Atoms], n_samples: int): + self.n_samples = n_samples + self.atoms_list = atoms_list + self.species = np.unique([x.symbol for atoms in atoms_list for x in atoms]) + self.species_dict = {x: i for i, x in enumerate(self.species)} + # start from a random configuration + self.list_index = [np.random.randint(0, len(atoms_list))] + self.assemble_descriptors() + + def run( + self, + ) -> List[int]: + """ + Run the farthest point sampling algorithm. + """ + descriptor_dataset_reshaped = ( + self.descriptors_dataset.reshape( # pylint: disable=E1121 + (len(self.atoms_list), -1) + ) + ) + logging.info(f"{descriptor_dataset_reshaped.shape}") + logging.info(f"n_samples: {self.n_samples}") + self.list_index = fpsample.fps_npdu_kdtree_sampling( + descriptor_dataset_reshaped, + self.n_samples, + ) + return self.list_index + + def assemble_descriptors(self) -> np.ndarray: + """ + Assemble the descriptors for all the configurations. + """ + self.descriptors_dataset: np.ndarray = 10e10 * np.ones( + ( + len(self.atoms_list), + len(self.species), + len(list(self.atoms_list[0].info["mace_descriptors"].values())[0]), + ), + dtype=np.float32, + ).astype(np.float32) + + for i, atoms in enumerate(self.atoms_list): + descriptors = atoms.info["mace_descriptors"] + for z in descriptors: + self.descriptors_dataset[i, self.species_dict[z]] = np.array( + descriptors[z] + ).astype(np.float32) + + +def select_samples( + args: argparse.Namespace, +) -> None: + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if args.model in ["small", "medium", "large"]: + calc = mace_mp(args.model, device=args.device, default_dtype=args.default_dtype) + else: + calc = MACECalculator( + model_paths=args.model, device=args.device, default_dtype=args.default_dtype + ) + if isinstance(args.configs_ft, str): + atoms_list_ft = ase.io.read(args.configs_ft, index=":") + else: + atoms_list_ft = [] + for path in args.configs_ft: + atoms_list_ft += ase.io.read(path, index=":") + + if args.filtering_type is not None: + all_species_ft = np.unique([x.symbol for atoms in atoms_list_ft for x in atoms]) + logging.info( + "Filtering configurations based on the finetuning set, " + f"filtering type: combinations, elements: {all_species_ft}" + ) + if args.subselect != "random": + if args.descriptors is not None: + logging.info("Loading descriptors") + descriptors = np.load(args.descriptors, allow_pickle=True) + atoms_list_pt = ase.io.read(args.configs_pt, index=":") + for i, atoms in enumerate(atoms_list_pt): + atoms.info["mace_descriptors"] = descriptors[i] + atoms_list_pt_filtered = [ + x + for x in atoms_list_pt + if filter_atoms(x, all_species_ft, "combinations") + ] + else: + atoms_list_pt = ase.io.read(args.configs_pt, index=":") + atoms_list_pt_filtered = [ + x + for x in atoms_list_pt + if filter_atoms(x, all_species_ft, "combinations") + ] + else: + atoms_list_pt = ase.io.read(args.configs_pt, index=":") + atoms_list_pt_filtered = [ + x + for x in atoms_list_pt + if filter_atoms(x, all_species_ft, "combinations") + ] + if len(atoms_list_pt_filtered) <= args.num_samples: + logging.info( + f"Number of configurations after filtering {len(atoms_list_pt_filtered)} " + f"is less than the number of samples {args.num_samples}, " + "selecting random configurations for the rest." + ) + atoms_list_pt_minus_filtered = [ + x for x in atoms_list_pt if x not in atoms_list_pt_filtered + ] + atoms_list_pt_random_inds = np.random.choice( + list(range(len(atoms_list_pt_minus_filtered))), + args.num_samples - len(atoms_list_pt_filtered), + replace=False, + ) + atoms_list_pt = atoms_list_pt_filtered + [ + atoms_list_pt_minus_filtered[ind] for ind in atoms_list_pt_random_inds + ] + else: + atoms_list_pt = atoms_list_pt_filtered + + else: + atoms_list_pt = ase.io.read(args.configs_pt, index=":") + if args.descriptors is not None: + logging.info( + f"Loading descriptors for the pretraining set from {args.descriptors}" + ) + descriptors = np.load(args.descriptors, allow_pickle=True) + for i, atoms in enumerate(atoms_list_pt): + atoms.info["mace_descriptors"] = descriptors[i] + + if args.num_samples is not None and args.num_samples < len(atoms_list_pt): + if args.subselect == "fps": + if args.descriptors is None: + logging.info("Calculating descriptors for the pretraining set") + calculate_descriptors(atoms_list_pt, calc) + descriptors_list = [ + atoms.info["mace_descriptors"] for atoms in atoms_list_pt + ] + logging.info( + f"Saving descriptors at {args.output.replace('.xyz', '_descriptors.npy')}" + ) + np.save( + args.output.replace(".xyz", "_descriptors.npy"), descriptors_list + ) + logging.info("Selecting configurations using Farthest Point Sampling") + try: + fps_pt = FPS(atoms_list_pt, args.num_samples) + idx_pt = fps_pt.run() + logging.info(f"Selected {len(idx_pt)} configurations") + except Exception as e: # pylint: disable=W0703 + logging.error( + f"FPS failed, selecting random configurations instead: {e}" + ) + idx_pt = np.random.choice( + list(range(len(atoms_list_pt))), args.num_samples, replace=False + ) + atoms_list_pt = [atoms_list_pt[i] for i in idx_pt] + else: + logging.info("Selecting random configurations") + idx_pt = np.random.choice( + list(range(len(atoms_list_pt))), args.num_samples, replace=False + ) + atoms_list_pt = [atoms_list_pt[i] for i in idx_pt] + for atoms in atoms_list_pt: + # del atoms.info["mace_descriptors"] + atoms.info["pretrained"] = True + atoms.info["config_weight"] = args.weight_pt + atoms.info["mace_descriptors"] = None + if args.head_pt is not None: + atoms.info["head"] = args.head_pt + + logging.info("Saving the selected configurations") + ase.io.write(args.output, atoms_list_pt, format="extxyz") + logging.info("Saving a combined XYZ file") + for atoms in atoms_list_ft: + atoms.info["pretrained"] = False + atoms.info["config_weight"] = args.weight_ft + atoms.info["mace_descriptors"] = None + if args.head_ft is not None: + atoms.info["head"] = args.head_ft + atoms_fps_pt_ft = atoms_list_pt + atoms_list_ft + ase.io.write( + args.output.replace(".xyz", "_combined.xyz"), atoms_fps_pt_ft, format="extxyz" + ) + + +def main(): + args = parse_args() + select_samples(args) + + +if __name__ == "__main__": + main() diff --git a/mace/cli/run_train.py b/mace/cli/run_train.py index f98c7a04..41f9f4a4 100644 --- a/mace/cli/run_train.py +++ b/mace/cli/run_train.py @@ -12,32 +12,43 @@ import os from copy import deepcopy from pathlib import Path -from typing import Optional +from typing import List, Optional -import numpy as np import torch.distributed import torch.nn.functional -from e3nn import o3 from e3nn.util import jit from torch.nn.parallel import DistributedDataParallel as DDP -from torch.optim.swa_utils import SWALR, AveragedModel +from torch.utils.data import ConcatDataset from torch_ema import ExponentialMovingAverage import mace -from mace import data, modules, tools +from mace import data, tools from mace.calculators.foundations_models import mace_mp, mace_off from mace.tools import torch_geometric -from mace.tools.finetuning_utils import load_foundations +from mace.tools.model_script_utils import configure_model +from mace.tools.multihead_tools import ( + HeadConfig, + assemble_mp_data, + dict_head_to_dataclass, + prepare_default_head, +) from mace.tools.scripts_utils import ( LRScheduler, convert_to_json_format, create_error_table, + dict_to_array, extract_config_mace_model, get_atomic_energies, + get_avg_num_neighbors, get_config_type_weights, get_dataset_from_xyz, get_files_with_suffix, + get_loss_fn, + get_optimizer, + get_params_options, + get_swa, print_git_commit, + setup_wandb, ) from mace.tools.slurm_distributed import DistributedEnvironment from mace.tools.utils import AtomicNumberTable @@ -55,8 +66,16 @@ def run(args: argparse.Namespace) -> None: """ This script runs the training/fine tuning for mace """ - args, input_log_messages = tools.check_args(args) tag = tools.get_tag(name=args.name, seed=args.seed) + args, input_log_messages = tools.check_args(args) + + if args.device == "xpu": + try: + import intel_extension_for_pytorch as ipex + except ImportError as e: + raise ImportError( + "Error: Intel extension for PyTorch not found, but XPU device was specified" + ) from e if args.distributed: try: distr_env = DistributedEnvironment() @@ -93,7 +112,12 @@ def run(args: argparse.Namespace) -> None: tools.set_default_dtype(args.default_dtype) device = tools.init_device(args.device) commit = print_git_commit() + model_foundation: Optional[torch.nn.Module] = None if args.foundation_model is not None: + if args.multiheads_finetuning: + assert ( + args.E0s != "average" + ), "average atomic energies cannot be used for multiheads finetuning" if args.foundation_model in ["small", "medium", "large"]: logging.info( f"Using foundation model mace-mp-0 {args.foundation_model} as initial checkpoint." @@ -116,154 +140,314 @@ def run(args: argparse.Namespace) -> None: ) model_foundation = calc.models[0] else: - model_foundation = torch.load(args.foundation_model, map_location=device) + model_foundation = torch.load( + args.foundation_model, map_location=args.device + ) logging.info( f"Using foundation model {args.foundation_model} as initial checkpoint." ) args.r_max = model_foundation.r_max.item() + else: + args.multiheads_finetuning = False - if args.statistics_file is not None: - with open(args.statistics_file, "r") as f: # pylint: disable=W1514 - statistics = json.load(f) - logging.info("Using statistics json file") - args.r_max = ( - statistics["r_max"] if args.foundation_model is None else args.r_max - ) - args.atomic_numbers = statistics["atomic_numbers"] - args.mean = statistics["mean"] - args.std = statistics["std"] - args.avg_num_neighbors = statistics["avg_num_neighbors"] - args.compute_avg_num_neighbors = False - args.E0s = statistics["atomic_energies"] + if args.heads is not None: + args.heads = ast.literal_eval(args.heads) + else: + args.heads = prepare_default_head(args) - logging.info("") logging.info("===========LOADING INPUT DATA===========") - # Data preparation - if args.train_file.endswith(".xyz"): - if args.valid_file is not None: - assert args.valid_file.endswith( - ".xyz" - ), "valid_file if given must be same format as train_file" - config_type_weights = get_config_type_weights(args.config_type_weights) - collections, atomic_energies_dict = get_dataset_from_xyz( - work_dir=args.work_dir, - train_path=args.train_file, - valid_path=args.valid_file, - valid_fraction=args.valid_fraction, - config_type_weights=config_type_weights, - test_path=args.test_file, - seed=args.seed, - energy_key=args.energy_key, - forces_key=args.forces_key, - stress_key=args.stress_key, - virials_key=args.virials_key, - dipole_key=args.dipole_key, - charges_key=args.charges_key, - keep_isolated_atoms=args.keep_isolated_atoms, + heads = list(args.heads.keys()) + logging.info(f"Using heads: {heads}") + head_configs: List[HeadConfig] = [] + for head, head_args in args.heads.items(): + logging.info(f"============= Processing head {head} ===========") + head_config = dict_head_to_dataclass(head_args, head, args) + if head_config.statistics_file is not None: + with open(head_config.statistics_file, "r") as f: # pylint: disable=W1514 + statistics = json.load(f) + logging.info("Using statistics json file") + head_config.r_max = ( + statistics["r_max"] if args.foundation_model is None else args.r_max + ) + head_config.atomic_numbers = statistics["atomic_numbers"] + head_config.mean = statistics["mean"] + head_config.std = statistics["std"] + head_config.avg_num_neighbors = statistics["avg_num_neighbors"] + head_config.compute_avg_num_neighbors = False + if isinstance(statistics["atomic_energies"], str) and statistics[ + "atomic_energies" + ].endswith(".json"): + with open(statistics["atomic_energies"], "r", encoding="utf-8") as f: + atomic_energies = json.load(f) + head_config.E0s = atomic_energies + head_config.atomic_energies_dict = ast.literal_eval(atomic_energies) + else: + head_config.E0s = statistics["atomic_energies"] + head_config.atomic_energies_dict = ast.literal_eval( + statistics["atomic_energies"] + ) + + # Data preparation + if head_config.train_file.endswith(".xyz"): + if head_config.valid_file is not None: + assert head_config.valid_file.endswith( + ".xyz" + ), "valid_file if given must be same format as train_file" + config_type_weights = get_config_type_weights( + head_config.config_type_weights + ) + collections, atomic_energies_dict = get_dataset_from_xyz( + work_dir=args.work_dir, + train_path=head_config.train_file, + valid_path=head_config.valid_file, + valid_fraction=head_config.valid_fraction, + config_type_weights=config_type_weights, + test_path=head_config.test_file, + seed=args.seed, + energy_key=head_config.energy_key, + forces_key=head_config.forces_key, + stress_key=head_config.stress_key, + virials_key=head_config.virials_key, + dipole_key=head_config.dipole_key, + charges_key=head_config.charges_key, + head_name=head_config.head_name, + keep_isolated_atoms=head_config.keep_isolated_atoms, + ) + head_config.collections = collections + head_config.atomic_energies_dict = atomic_energies_dict + logging.info( + f"Total number of configurations: train={len(collections.train)}, valid={len(collections.valid)}, " + f"tests=[{', '.join([name + ': ' + str(len(test_configs)) for name, test_configs in collections.tests])}]," + ) + head_configs.append(head_config) + + if all(head_config.train_file.endswith(".xyz") for head_config in head_configs): + size_collections_train = sum( + len(head_config.collections.train) for head_config in head_configs + ) + size_collections_valid = sum( + len(head_config.collections.valid) for head_config in head_configs ) - if len(collections.train) < args.batch_size: + if size_collections_train < args.batch_size: logging.error( - f"Batch size ({args.batch_size}) is larger than the number of training data ({len(collections.train)})" + f"Batch size ({args.batch_size}) is larger than the number of training data ({size_collections_train})" ) - if len(collections.valid) < args.valid_batch_size: + if size_collections_valid < args.valid_batch_size: logging.warning( - f"Validation batch size ({args.valid_batch_size}) is larger than the number of validation data ({len(collections.valid)})" + f"Validation batch size ({args.valid_batch_size}) is larger than the number of validation data ({size_collections_valid})" ) - args.valid_batch_size = len(collections.valid) - else: - atomic_energies_dict = None + if args.multiheads_finetuning: + logging.info( + "==================Using multiheads finetuning mode==================" + ) + args.loss = "universal" + if ( + args.foundation_model in ["small", "medium", "large"] + or "mp" in args.foundation_model + or args.pt_train_file is None + ): + logging.info( + "Using foundation model for multiheads finetuning with Materials Project data" + ) + heads = list(dict.fromkeys(["pt_head"] + heads)) + head_config_pt = HeadConfig( + head_name="pt_head", + E0s="foundation", + statistics_file=args.statistics_file, + compute_avg_num_neighbors=False, + avg_num_neighbors=model_foundation.interactions[0].avg_num_neighbors, + ) + collections = assemble_mp_data(args, tag, head_configs) + head_config_pt.collections = collections + head_config_pt.train_file = f"mp_finetuning-{tag}.xyz" + head_configs.append(head_config_pt) + else: + logging.info( + f"Using foundation model for multiheads finetuning with {args.pt_train_file}" + ) + heads = list(dict.fromkeys(["pt_head"] + heads)) + collections, atomic_energies_dict = get_dataset_from_xyz( + work_dir=args.work_dir, + train_path=args.pt_train_file, + valid_path=args.pt_valid_file, + valid_fraction=args.valid_fraction, + config_type_weights=None, + test_path=None, + seed=args.seed, + energy_key=args.energy_key, + forces_key=args.forces_key, + stress_key=args.stress_key, + virials_key=args.virials_key, + dipole_key=args.dipole_key, + charges_key=args.charges_key, + head_name="pt_head", + keep_isolated_atoms=args.keep_isolated_atoms, + ) + head_config_pt = HeadConfig( + head_name="pt_head", + train_file=args.pt_train_file, + valid_file=args.pt_valid_file, + E0s="foundation", + statistics_file=args.statistics_file, + valid_fraction=args.valid_fraction, + config_type_weights=None, + energy_key=args.energy_key, + forces_key=args.forces_key, + stress_key=args.stress_key, + virials_key=args.virials_key, + dipole_key=args.dipole_key, + charges_key=args.charges_key, + keep_isolated_atoms=args.keep_isolated_atoms, + collections=collections, + avg_num_neighbors=model_foundation.interactions[0].avg_num_neighbors, + compute_avg_num_neighbors=False, + ) + head_config_pt.collections = collections + head_configs.append(head_config_pt) + logging.info( + f"Total number of configurations: train={len(collections.train)}, valid={len(collections.valid)}" + ) # Atomic number table # yapf: disable - if args.atomic_numbers is None: - assert args.train_file.endswith(".xyz"), "Must specify atomic_numbers when using .h5 train_file input" - z_table = tools.get_atomic_number_table_from_zs( - z - for configs in (collections.train, collections.valid) - for config in configs - for z in config.atomic_numbers - ) - else: - if args.statistics_file is None: - logging.info("Using atomic numbers from command line argument") + for head_config in head_configs: + if head_config.atomic_numbers is None: + assert head_config.train_file.endswith(".xyz"), "Must specify atomic_numbers when using .h5 train_file input" + z_table_head = tools.get_atomic_number_table_from_zs( + z + for configs in (head_config.collections.train, head_config.collections.valid) + for config in configs + for z in config.atomic_numbers + ) + head_config.atomic_numbers = z_table_head.zs + head_config.z_table = z_table_head else: - logging.info("Using atomic numbers from statistics file") - zs_list = ast.literal_eval(args.atomic_numbers) - assert isinstance(zs_list, list) - z_table = tools.get_atomic_number_table_from_zs(zs_list) - # yapf: enable + if head_config.statistics_file is None: + logging.info("Using atomic numbers from command line argument") + else: + logging.info("Using atomic numbers from statistics file") + zs_list = ast.literal_eval(head_config.atomic_numbers) + assert isinstance(zs_list, list) + z_table_head = tools.AtomicNumberTable(zs_list) + head_config.atomic_numbers = zs_list + head_config.z_table = z_table_head + # yapf: enable + all_atomic_numbers = set() + for head_config in head_configs: + all_atomic_numbers.update(head_config.atomic_numbers) + z_table = AtomicNumberTable(sorted(list(all_atomic_numbers))) logging.info(f"Atomic Numbers used: {z_table.zs}") - if atomic_energies_dict is None or len(atomic_energies_dict) == 0: - if args.E0s.lower() == "foundation": - assert args.foundation_model is not None - z_table_foundation = AtomicNumberTable( - [int(z) for z in model_foundation.atomic_numbers] - ) - atomic_energies_dict = { - z: model_foundation.atomic_energies_fn.atomic_energies[ - z_table_foundation.z_to_index(z) - ].item() - for z in z_table.zs - } - logging.info( - f"Using Atomic Energies from foundation model [z, eV]: {', '.join([f'{z}: {atomic_energies_dict[z]}' for z in z_table_foundation.zs])}" - ) - else: - if args.train_file.endswith(".xyz"): - atomic_energies_dict = get_atomic_energies( - args.E0s, collections.train, z_table + # Atomic energies + atomic_energies_dict = {} + for head_config in head_configs: + if head_config.atomic_energies_dict is None or len(head_config.atomic_energies_dict) == 0: + if head_config.train_file.endswith(".xyz") and head_config.E0s.lower() != "foundation": + atomic_energies_dict[head_config.head_name] = get_atomic_energies( + head_config.E0s, head_config.collections.train, head_config.z_table ) + elif head_config.E0s.lower() == "foundation": + assert args.foundation_model is not None + z_table_foundation = AtomicNumberTable( + [int(z) for z in model_foundation.atomic_numbers] + ) + atomic_energies_dict[head_config.head_name] = { + z: model_foundation.atomic_energies_fn.atomic_energies[ + z_table_foundation.z_to_index(z) + ].item() + for z in z_table.zs + } else: - atomic_energies_dict = get_atomic_energies(args.E0s, None, z_table) + atomic_energies_dict[head_config.head_name] = get_atomic_energies(head_config.E0s, None, head_config.z_table) + else: + atomic_energies_dict[head_config.head_name] = head_config.atomic_energies_dict + + # Atomic energies for multiheads finetuning + if args.multiheads_finetuning: + assert ( + model_foundation is not None + ), "Model foundation must be provided for multiheads finetuning" + z_table_foundation = AtomicNumberTable( + [int(z) for z in model_foundation.atomic_numbers] + ) + atomic_energies_dict["pt_head"] = { + z: model_foundation.atomic_energies_fn.atomic_energies[ + z_table_foundation.z_to_index(z) + ].item() + for z in z_table.zs + } if args.model == "AtomicDipolesMACE": atomic_energies = None dipole_only = True - compute_dipole = True - compute_energy = False + args.compute_dipole = True + args.compute_energy = False args.compute_forces = False - compute_virials = False + args.compute_virials = False args.compute_stress = False else: dipole_only = False if args.model == "EnergyDipolesMACE": - compute_dipole = True - compute_energy = True + args.compute_dipole = True + args.compute_energy = True args.compute_forces = True - compute_virials = False + args.compute_virials = False args.compute_stress = False else: - compute_energy = True - compute_dipole = False - atomic_energies: np.ndarray = np.array( - [atomic_energies_dict[z] for z in z_table.zs] - ) - logging.info( - f"Atomic Energies used (z: eV): {{{', '.join([f'{z}: {atomic_energies_dict[z]}' for z in z_table.zs])}}}" - ) + args.compute_energy = True + args.compute_dipole = False + # atomic_energies: np.ndarray = np.array( + # [atomic_energies_dict[z] for z in z_table.zs] + # ) + atomic_energies = dict_to_array(atomic_energies_dict, heads) + for head_config in head_configs: + logging.info(f"Atomic Energies used (z: eV) for head {head_config.head_name}: " + "{" + ", ".join([f"{z}: {atomic_energies_dict[head_config.head_name][z]}" for z in head_config.z_table.zs]) + "}") + + + valid_sets = {head: [] for head in heads} + train_sets = {head: [] for head in heads} + for head_config in head_configs: + if head_config.train_file.endswith(".xyz"): + train_sets[head_config.head_name] = [ + data.AtomicData.from_config( + config, z_table=z_table, cutoff=args.r_max, heads=heads + ) + for config in head_config.collections.train + ] + valid_sets[head_config.head_name] = [ + data.AtomicData.from_config( + config, z_table=z_table, cutoff=args.r_max, heads=heads + ) + for config in head_config.collections.valid + ] - if args.train_file.endswith(".xyz"): - train_set = [ - data.AtomicData.from_config(config, z_table=z_table, cutoff=args.r_max) - for config in collections.train - ] - valid_set = [ - data.AtomicData.from_config(config, z_table=z_table, cutoff=args.r_max) - for config in collections.valid - ] - elif args.train_file.endswith(".h5"): - train_set = data.HDF5Dataset(args.train_file, r_max=args.r_max, z_table=z_table) - valid_set = data.HDF5Dataset(args.valid_file, r_max=args.r_max, z_table=z_table) - else: # This case would be for when the file path is to a directory of multiple .h5 files - train_set = data.dataset_from_sharded_hdf5( - args.train_file, r_max=args.r_max, z_table=z_table - ) - valid_set = data.dataset_from_sharded_hdf5( - args.valid_file, r_max=args.r_max, z_table=z_table + elif head_config.train_file.endswith(".h5"): + train_sets[head_config.head_name] = data.HDF5Dataset( + head_config.train_file, r_max=args.r_max, z_table=z_table, heads=heads, head=head_config.head_name + ) + valid_sets[head_config.head_name] = data.HDF5Dataset( + head_config.valid_file, r_max=args.r_max, z_table=z_table, heads=heads, head=head_config.head_name + ) + else: # This case would be for when the file path is to a directory of multiple .h5 files + train_sets[head_config.head_name] = data.dataset_from_sharded_hdf5( + head_config.train_file, r_max=args.r_max, z_table=z_table, heads=heads, head=head_config.head_name + ) + valid_sets[head_config.head_name] = data.dataset_from_sharded_hdf5( + head_config.valid_file, r_max=args.r_max, z_table=z_table, heads=heads, head=head_config.head_name + ) + train_loader_head = torch_geometric.dataloader.DataLoader( + dataset=train_sets[head_config.head_name], + batch_size=args.batch_size, + shuffle=True, + drop_last=True, + pin_memory=args.pin_memory, + num_workers=args.num_workers, + generator=torch.Generator().manual_seed(args.seed), ) - + head_config.train_loader = train_loader_head + # concatenate all the trainsets + train_set = ConcatDataset([train_sets[head] for head in heads]) train_sampler, valid_sampler = None, None if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( @@ -274,14 +458,17 @@ def run(args: argparse.Namespace) -> None: drop_last=True, seed=args.seed, ) - valid_sampler = torch.utils.data.distributed.DistributedSampler( - valid_set, - num_replicas=world_size, - rank=rank, - shuffle=True, - drop_last=True, - seed=args.seed, - ) + valid_samplers = {} + for head, valid_set in valid_sets.items(): + valid_sampler = torch.utils.data.distributed.DistributedSampler( + valid_set, + num_replicas=world_size, + rank=rank, + shuffle=True, + drop_last=True, + seed=args.seed, + ) + valid_samplers[head] = valid_sampler train_loader = torch_geometric.dataloader.DataLoader( dataset=train_set, batch_size=args.batch_size, @@ -292,273 +479,26 @@ def run(args: argparse.Namespace) -> None: num_workers=args.num_workers, generator=torch.Generator().manual_seed(args.seed), ) - valid_loader = torch_geometric.dataloader.DataLoader( - dataset=valid_set, - batch_size=args.valid_batch_size, - sampler=valid_sampler, - shuffle=False, - drop_last=False, - pin_memory=args.pin_memory, - num_workers=args.num_workers, - generator=torch.Generator().manual_seed(args.seed), - ) - logging.info("") - logging.info("===========MODEL DETAILS===========") - if args.loss == "weighted": - loss_fn = modules.WeightedEnergyForcesLoss( - energy_weight=args.energy_weight, forces_weight=args.forces_weight - ) - elif args.loss == "forces_only": - loss_fn = modules.WeightedForcesLoss(forces_weight=args.forces_weight) - elif args.loss == "virials": - loss_fn = modules.WeightedEnergyForcesVirialsLoss( - energy_weight=args.energy_weight, - forces_weight=args.forces_weight, - virials_weight=args.virials_weight, - ) - elif args.loss == "stress": - loss_fn = modules.WeightedEnergyForcesStressLoss( - energy_weight=args.energy_weight, - forces_weight=args.forces_weight, - stress_weight=args.stress_weight, - ) - elif args.loss == "huber": - loss_fn = modules.WeightedHuberEnergyForcesStressLoss( - energy_weight=args.energy_weight, - forces_weight=args.forces_weight, - stress_weight=args.stress_weight, - huber_delta=args.huber_delta, - ) - elif args.loss == "universal": - loss_fn = modules.UniversalLoss( - energy_weight=args.energy_weight, - forces_weight=args.forces_weight, - stress_weight=args.stress_weight, - huber_delta=args.huber_delta, - ) - elif args.loss == "dipole": - assert ( - dipole_only is True - ), "dipole loss can only be used with AtomicDipolesMACE model" - loss_fn = modules.DipoleSingleLoss( - dipole_weight=args.dipole_weight, - ) - elif args.loss == "energy_forces_dipole": - assert dipole_only is False and compute_dipole is True - loss_fn = modules.WeightedEnergyForcesDipoleLoss( - energy_weight=args.energy_weight, - forces_weight=args.forces_weight, - dipole_weight=args.dipole_weight, - ) - else: - # Unweighted Energy and Forces loss by default - loss_fn = modules.WeightedEnergyForcesLoss(energy_weight=1.0, forces_weight=1.0) - - if args.compute_avg_num_neighbors: - avg_num_neighbors = modules.compute_avg_num_neighbors(train_loader) - if args.distributed: - num_graphs = torch.tensor(len(train_loader.dataset)).to(device) - num_neighbors = num_graphs * torch.tensor(avg_num_neighbors).to(device) - torch.distributed.all_reduce(num_graphs, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce( - num_neighbors, op=torch.distributed.ReduceOp.SUM - ) - args.avg_num_neighbors = (num_neighbors / num_graphs).item() - else: - args.avg_num_neighbors = avg_num_neighbors - if args.avg_num_neighbors < 2 or args.avg_num_neighbors > 100: - logging.warning( - f"Unusual average number of neighbors: {args.avg_num_neighbors:.1f}" - ) - else: - logging.info(f"Average number of neighbors: {args.avg_num_neighbors:.1f}") - - # Selecting outputs - compute_virials = False - if args.loss in ("stress", "virials", "huber", "universal"): - compute_virials = True - args.compute_stress = True - if "MAE" in args.error_table: - args.error_table = "PerAtomMAEstressvirials" - else: - args.error_table = "PerAtomRMSEstressvirials" - - output_args = { - "energy": compute_energy, - "forces": args.compute_forces, - "virials": compute_virials, - "stress": args.compute_stress, - "dipoles": compute_dipole, - } - - logging.info( - f"During training the following quantities will be reported: {', '.join([f'{report}' for report, value in output_args.items() if value])}" - ) - - if args.scaling == "no_scaling": - args.std = 1.0 - logging.info("No scaling selected") - elif (args.mean is None or args.std is None) and args.model != "AtomicDipolesMACE": - args.mean, args.std = modules.scaling_classes[args.scaling]( - train_loader, atomic_energies - ) - # Build model - if args.foundation_model is not None and args.model in ["MACE", "ScaleShiftMACE"]: - logging.info("Loading FOUNDATION model") - model_config_foundation = extract_config_mace_model(model_foundation) - model_config_foundation["atomic_numbers"] = z_table.zs - model_config_foundation["num_elements"] = len(z_table) - args.max_L = model_config_foundation["hidden_irreps"].lmax - args.num_channels = list( - {irrep.mul for irrep in o3.Irreps(model_config_foundation["hidden_irreps"])} - )[0] - model_config_foundation["atomic_inter_shift"] = ( - model_foundation.scale_shift.shift.item() - ) - model_config_foundation["atomic_inter_scale"] = ( - model_foundation.scale_shift.scale.item() - ) - model_config_foundation["atomic_energies"] = atomic_energies - args.model = "FoundationMACE" - model_config = model_config_foundation # pylint - logging.info( - f"Message passing with {args.num_channels} channels and max_L={args.max_L} ({model_config_foundation['hidden_irreps']})" - ) - logging.info( - f"{model_config_foundation['num_interactions']} layers, each with correlation order: {model_config_foundation['correlation']} (body order: {model_config_foundation['correlation']+1}) and spherical harmonics up to: l={model_config_foundation['max_ell']}" - ) - logging.info( - f"Radial cutoff: {model_config_foundation['r_max']} Å (total receptive field for each atom: {model_config_foundation['r_max'] * model_config_foundation['num_interactions']} Å)" - ) - logging.info( - f"Distance transform for radial basis functions: {model_config_foundation['distance_transform']}" - ) - else: - logging.info("Building model") - logging.info( - f"Message passing with {args.num_channels} channels and max_L={args.max_L} ({args.hidden_irreps})" - ) - logging.info( - f"{args.num_interactions} layers, each with correlation order: {args.correlation} (body order: {args.correlation+1}) and spherical harmonics up to: l={args.max_ell}" - ) - logging.info( - f"{args.num_radial_basis} radial and {args.num_cutoff_basis} basis functions" - ) - logging.info( - f"Radial cutoff: {args.r_max} Å (total receptive field for each atom: {args.r_max * args.num_interactions} Å)" - ) - logging.info( - f"Distance transform for radial basis functions: {args.distance_transform}" - ) - model_config = dict( - r_max=args.r_max, - num_bessel=args.num_radial_basis, - num_polynomial_cutoff=args.num_cutoff_basis, - max_ell=args.max_ell, - interaction_cls=modules.interaction_classes[args.interaction], - num_interactions=args.num_interactions, - num_elements=len(z_table), - hidden_irreps=o3.Irreps(args.hidden_irreps), - atomic_energies=atomic_energies, - avg_num_neighbors=args.avg_num_neighbors, - atomic_numbers=z_table.zs, + valid_loaders = {heads[i]: None for i in range(len(heads))} + if not isinstance(valid_sets, dict): + valid_sets = {"Default": valid_sets} + for head, valid_set in valid_sets.items(): + valid_loaders[head] = torch_geometric.dataloader.DataLoader( + dataset=valid_set, + batch_size=args.valid_batch_size, + sampler=valid_samplers[head] if args.distributed else None, + shuffle=False, + drop_last=False, + pin_memory=args.pin_memory, + num_workers=args.num_workers, + generator=torch.Generator().manual_seed(args.seed), ) - model: torch.nn.Module - - if args.model == "MACE": - model = modules.ScaleShiftMACE( - **model_config, - pair_repulsion=args.pair_repulsion, - distance_transform=args.distance_transform, - correlation=args.correlation, - gate=modules.gate_dict[args.gate], - interaction_cls_first=modules.interaction_classes[ - "RealAgnosticInteractionBlock" - ], - MLP_irreps=o3.Irreps(args.MLP_irreps), - atomic_inter_scale=args.std, - atomic_inter_shift=0.0, - radial_MLP=ast.literal_eval(args.radial_MLP), - radial_type=args.radial_type, - ) - elif args.model == "ScaleShiftMACE": - model = modules.ScaleShiftMACE( - **model_config, - pair_repulsion=args.pair_repulsion, - distance_transform=args.distance_transform, - correlation=args.correlation, - gate=modules.gate_dict[args.gate], - interaction_cls_first=modules.interaction_classes[args.interaction_first], - MLP_irreps=o3.Irreps(args.MLP_irreps), - atomic_inter_scale=args.std, - atomic_inter_shift=args.mean, - radial_MLP=ast.literal_eval(args.radial_MLP), - radial_type=args.radial_type, - ) - elif args.model == "FoundationMACE": - model = modules.ScaleShiftMACE(**model_config_foundation) - elif args.model == "ScaleShiftBOTNet": - model = modules.ScaleShiftBOTNet( - **model_config, - gate=modules.gate_dict[args.gate], - interaction_cls_first=modules.interaction_classes[args.interaction_first], - MLP_irreps=o3.Irreps(args.MLP_irreps), - atomic_inter_scale=args.std, - atomic_inter_shift=args.mean, - ) - elif args.model == "BOTNet": - model = modules.BOTNet( - **model_config, - gate=modules.gate_dict[args.gate], - interaction_cls_first=modules.interaction_classes[args.interaction_first], - MLP_irreps=o3.Irreps(args.MLP_irreps), - ) - elif args.model == "AtomicDipolesMACE": - # std_df = modules.scaling_classes["rms_dipoles_scaling"](train_loader) - assert args.loss == "dipole", "Use dipole loss with AtomicDipolesMACE model" - assert ( - args.error_table == "DipoleRMSE" - ), "Use error_table DipoleRMSE with AtomicDipolesMACE model" - model = modules.AtomicDipolesMACE( - **model_config, - correlation=args.correlation, - gate=modules.gate_dict[args.gate], - interaction_cls_first=modules.interaction_classes[ - "RealAgnosticInteractionBlock" - ], - MLP_irreps=o3.Irreps(args.MLP_irreps), - # dipole_scale=1, - # dipole_shift=0, - ) - elif args.model == "EnergyDipolesMACE": - # std_df = modules.scaling_classes["rms_dipoles_scaling"](train_loader) - assert ( - args.loss == "energy_forces_dipole" - ), "Use energy_forces_dipole loss with EnergyDipolesMACE model" - assert ( - args.error_table == "EnergyDipoleRMSE" - ), "Use error_table EnergyDipoleRMSE with AtomicDipolesMACE model" - model = modules.EnergyDipolesMACE( - **model_config, - correlation=args.correlation, - gate=modules.gate_dict[args.gate], - interaction_cls_first=modules.interaction_classes[ - "RealAgnosticInteractionBlock" - ], - MLP_irreps=o3.Irreps(args.MLP_irreps), - ) - else: - raise RuntimeError(f"Unknown model: '{args.model}'") + loss_fn = get_loss_fn(args, dipole_only, args.compute_dipole) + args.avg_num_neighbors = get_avg_num_neighbors(head_configs, args, train_loader, device) - if args.foundation_model is not None: - model = load_foundations( - model, - model_foundation, - z_table, - load_readout=True, - max_L=args.max_L, - ) + # Model + model, output_args = configure_model(args, train_loader, atomic_energies, model_foundation, heads, z_table) model.to(device) logging.debug(model) @@ -570,68 +510,18 @@ def run(args: argparse.Namespace) -> None: if args.ema: logging.info(f"Using Exponential Moving Average with decay: {args.ema_decay}") logging.info( - f"Number of gradient updates: {int(args.max_num_epochs*len(collections.train)/args.batch_size)}" + f"Number of gradient updates: {int(args.max_num_epochs*len(train_set)/args.batch_size)}" ) logging.info(f"Learning rate: {args.lr}, weight decay: {args.weight_decay}") logging.info(loss_fn) # Optimizer - decay_interactions = {} - no_decay_interactions = {} - for name, param in model.interactions.named_parameters(): - if "linear.weight" in name or "skip_tp_full.weight" in name: - decay_interactions[name] = param - else: - no_decay_interactions[name] = param - - param_options = dict( - params=[ - { - "name": "embedding", - "params": model.node_embedding.parameters(), - "weight_decay": 0.0, - }, - { - "name": "interactions_decay", - "params": list(decay_interactions.values()), - "weight_decay": args.weight_decay, - }, - { - "name": "interactions_no_decay", - "params": list(no_decay_interactions.values()), - "weight_decay": 0.0, - }, - { - "name": "products", - "params": model.products.parameters(), - "weight_decay": args.weight_decay, - }, - { - "name": "readouts", - "params": model.readouts.parameters(), - "weight_decay": 0.0, - }, - ], - lr=args.lr, - amsgrad=args.amsgrad, - betas=(args.beta, 0.999), - ) - + param_options = get_params_options(args, model) optimizer: torch.optim.Optimizer - if args.optimizer == "adamw": - optimizer = torch.optim.AdamW(**param_options) - elif args.optimizer == "schedulefree": - try: - from schedulefree import adamw_schedulefree - except ImportError as exc: - raise ImportError( - "`schedulefree` is not installed. Please install it via `pip install schedulefree` or `pip install mace-torch[schedulefree]`" - ) from exc - _param_options = {k: v for k, v in param_options.items() if k != "amsgrad"} - optimizer = adamw_schedulefree.AdamWScheduleFree(**_param_options) - else: - optimizer = torch.optim.Adam(**param_options) - + optimizer = get_optimizer(args, param_options) + if args.device == "xpu": + logging.info("Optimzing model and optimzier for XPU") + model, optimizer = ipex.optimize(model, optimizer=optimizer) logger = tools.MetricsLogger( directory=args.results_dir, tag=tag + "_train" ) # pylint: disable=E1123 @@ -641,50 +531,7 @@ def run(args: argparse.Namespace) -> None: swa: Optional[tools.SWAContainer] = None swas = [False] if args.swa: - assert dipole_only is False, "Stage Two for dipole fitting not implemented" - swas.append(True) - if args.start_swa is None: - args.start_swa = max(1, args.max_num_epochs // 4 * 3) - logging.info( - f"Stage Two will start after {args.start_swa} epochs with loss function:" - ) - if args.loss == "forces_only": - raise ValueError("Can not select Stage Two with forces only loss.") - if args.loss == "virials": - loss_fn_energy = modules.WeightedEnergyForcesVirialsLoss( - energy_weight=args.swa_energy_weight, - forces_weight=args.swa_forces_weight, - virials_weight=args.swa_virials_weight, - ) - elif args.loss == "stress": - loss_fn_energy = modules.WeightedEnergyForcesStressLoss( - energy_weight=args.swa_energy_weight, - forces_weight=args.swa_forces_weight, - stress_weight=args.swa_stress_weight, - ) - elif args.loss == "energy_forces_dipole": - loss_fn_energy = modules.WeightedEnergyForcesDipoleLoss( - args.swa_energy_weight, - forces_weight=args.swa_forces_weight, - dipole_weight=args.swa_dipole_weight, - ) - else: - loss_fn_energy = modules.WeightedEnergyForcesLoss( - energy_weight=args.swa_energy_weight, - forces_weight=args.swa_forces_weight, - ) - logging.info(loss_fn_energy) - swa = tools.SWAContainer( - model=AveragedModel(model), - scheduler=SWALR( - optimizer=optimizer, - swa_lr=args.swa_lr, - anneal_epochs=1, - anneal_strategy="linear", - ), - start=args.start_swa, - loss_fn=loss_fn_energy, - ) + swa, swas = get_swa(args, model, optimizer, swas, dipole_only) checkpoint_handler = tools.CheckpointHandler( directory=args.checkpoints_dir, @@ -718,22 +565,7 @@ def run(args: argparse.Namespace) -> None: group["lr"] = args.lr if args.wandb: - logging.info("Using Weights and Biases for logging") - import wandb - - wandb_config = {} - args_dict = vars(args) - args_dict_json = json.dumps(args_dict) - for key in args.wandb_log_hypers: - wandb_config[key] = args_dict[key] - tools.init_wandb( - project=args.wandb_project, - entity=args.wandb_entity, - name=args.wandb_name, - config=wandb_config, - directory=args.wandb_dir, - ) - wandb.run.summary["params"] = args_dict_json + setup_wandb(args) if args.distributed: distributed_model = DDP(model, device_ids=[local_rank]) @@ -744,7 +576,7 @@ def run(args: argparse.Namespace) -> None: model=model, loss_fn=loss_fn, train_loader=train_loader, - valid_loader=valid_loader, + valid_loaders=valid_loaders, optimizer=optimizer, lr_scheduler=lr_scheduler, checkpoint_handler=checkpoint_handler, @@ -766,68 +598,85 @@ def run(args: argparse.Namespace) -> None: train_sampler=train_sampler, rank=rank, ) + logging.info("") logging.info("===========RESULTS===========") logging.info("Computing metrics for training, validation, and test sets") - all_data_loaders = { - "train": train_loader, - "valid": valid_loader, - } + train_valid_data_loader = {} + for head_config in head_configs: + data_loader_name = "train_" + head_config.head_name + train_valid_data_loader[data_loader_name] = head_config.train_loader + for head, valid_loader in valid_loaders.items(): + data_load_name = "valid_" + head + train_valid_data_loader[data_load_name] = valid_loader test_sets = {} - if args.train_file.endswith(".xyz"): - for name, subset in collections.tests: - test_sets[name] = [ - data.AtomicData.from_config(config, z_table=z_table, cutoff=args.r_max) - for config in subset - ] - elif not args.multi_processed_test: - test_files = get_files_with_suffix(args.test_dir, "_test.h5") - for test_file in test_files: - name = os.path.splitext(os.path.basename(test_file))[0] - test_sets[name] = data.HDF5Dataset( - test_file, r_max=args.r_max, z_table=z_table - ) - else: - test_folders = glob(args.test_dir + "/*") - for folder in test_folders: - name = os.path.splitext(os.path.basename(test_file))[0] - test_sets[name] = data.dataset_from_sharded_hdf5( - folder, r_max=args.r_max, z_table=z_table - ) - - for test_name, test_set in test_sets.items(): - test_sampler = None - if args.distributed: - test_sampler = torch.utils.data.distributed.DistributedSampler( + stop_first_test = False + test_data_loader = {} + if all( + head_config.test_file == head_configs[0].test_file + for head_config in head_configs + ) and head_configs[0].test_file is not None: + stop_first_test = True + if all( + head_config.test_dir == head_configs[0].test_dir + for head_config in head_configs + ) and head_configs[0].test_dir is not None: + stop_first_test = True + for head_config in head_configs: + if head_config.train_file.endswith(".xyz"): + print(head_config.test_file) + for name, subset in head_config.collections.tests: + print(name) + test_sets[name] = [ + data.AtomicData.from_config( + config, z_table=z_table, cutoff=args.r_max, heads=heads + ) + for config in subset + ] + if head_config.test_dir is not None: + if not args.multi_processed_test: + test_files = get_files_with_suffix(head_config.test_dir, "_test.h5") + for test_file in test_files: + name = os.path.splitext(os.path.basename(test_file))[0] + test_sets[name] = data.HDF5Dataset( + test_file, r_max=args.r_max, z_table=z_table, heads=heads, head=head_config.head_name + ) + else: + test_folders = glob(head_config.test_dir + "/*") + for folder in test_folders: + name = os.path.splitext(os.path.basename(test_file))[0] + test_sets[name] = data.dataset_from_sharded_hdf5( + folder, r_max=args.r_max, z_table=z_table, heads=heads, head=head_config.head_name + ) + for test_name, test_set in test_sets.items(): + print(test_name) + test_sampler = None + if args.distributed: + test_sampler = torch.utils.data.distributed.DistributedSampler( + test_set, + num_replicas=world_size, + rank=rank, + shuffle=True, + drop_last=True, + seed=args.seed, + ) + try: + drop_last = test_set.drop_last + except AttributeError as e: # pylint: disable=W0612 + drop_last = False + test_loader = torch_geometric.dataloader.DataLoader( test_set, - num_replicas=world_size, - rank=rank, - shuffle=True, - drop_last=True, - seed=args.seed, + batch_size=args.valid_batch_size, + shuffle=(test_sampler is None), + drop_last=drop_last, + num_workers=args.num_workers, + pin_memory=args.pin_memory, ) - try: - drop_last = test_set.drop_last - except AttributeError as e: # pylint: disable=W0612 - drop_last = False - test_loader = torch_geometric.dataloader.DataLoader( - test_set, - batch_size=args.valid_batch_size, - shuffle=(test_sampler is None), - drop_last=drop_last, - num_workers=args.num_workers, - pin_memory=args.pin_memory, - ) - all_data_loaders[test_name] = test_loader - - train_valid_data_loader = { - k: v for k, v in all_data_loaders.items() if k in ["train", "valid"] - } - test_data_loader = { - k: v for k, v in all_data_loaders.items() if k not in ["train", "valid"] - } + test_data_loader[test_name] = test_loader + if stop_first_test: + break for swa_eval in swas: epoch = checkpoint_handler.load_latest( @@ -846,8 +695,7 @@ def run(args: argparse.Namespace) -> None: for param in model.parameters(): param.requires_grad = False - - table_train = create_error_table( + table_train_valid = create_error_table( table_type=args.error_table, all_data_loaders=train_valid_data_loader, model=model_to_evaluate, @@ -857,18 +705,20 @@ def run(args: argparse.Namespace) -> None: device=device, distributed=args.distributed, ) - table_test = create_error_table( - table_type=args.error_table, - all_data_loaders=test_data_loader, - model=model_to_evaluate, - loss_fn=loss_fn, - output_args=output_args, - log_wandb=args.wandb, - device=device, - distributed=args.distributed, - ) - logging.info("Error-table on TRAIN and VALID:\n" + str(table_train)) - logging.info("Error-table on TEST:\n" + str(table_test)) + logging.info("Error-table on TRAIN and VALID:\n" + str(table_train_valid)) + + if test_data_loader: + table_test = create_error_table( + table_type=args.error_table, + all_data_loaders=test_data_loader, + model=model_to_evaluate, + loss_fn=loss_fn, + output_args=output_args, + log_wandb=args.wandb, + device=device, + distributed=args.distributed, + ) + logging.info("Error-table on TEST:\n" + str(table_test)) if rank == 0: # Save entire model diff --git a/mace/data/atomic_data.py b/mace/data/atomic_data.py index edb91b14..cb4edd94 100644 --- a/mace/data/atomic_data.py +++ b/mace/data/atomic_data.py @@ -52,6 +52,7 @@ def __init__( unit_shifts: torch.Tensor, # [n_edges, 3] cell: Optional[torch.Tensor], # [3,3] weight: Optional[torch.Tensor], # [,] + head: Optional[torch.Tensor], # [,] energy_weight: Optional[torch.Tensor], # [,] forces_weight: Optional[torch.Tensor], # [,] stress_weight: Optional[torch.Tensor], # [,] @@ -72,6 +73,7 @@ def __init__( assert unit_shifts.shape[1] == 3 assert len(node_attrs.shape) == 2 assert weight is None or len(weight.shape) == 0 + assert head is None or len(head.shape) == 0 assert energy_weight is None or len(energy_weight.shape) == 0 assert forces_weight is None or len(forces_weight.shape) == 0 assert stress_weight is None or len(stress_weight.shape) == 0 @@ -93,6 +95,7 @@ def __init__( "cell": cell, "node_attrs": node_attrs, "weight": weight, + "head": head, "energy_weight": energy_weight, "forces_weight": forces_weight, "stress_weight": stress_weight, @@ -108,9 +111,15 @@ def __init__( @classmethod def from_config( - cls, config: Configuration, z_table: AtomicNumberTable, cutoff: float + cls, + config: Configuration, + z_table: AtomicNumberTable, + cutoff: float, + heads: Optional[list] = None, ) -> "AtomicData": - edge_index, shifts, unit_shifts = get_neighborhood( + if heads is None: + heads = ["default"] + edge_index, shifts, unit_shifts, cell = get_neighborhood( positions=config.positions, cutoff=cutoff, pbc=config.pbc, cell=config.cell ) indices = atomic_numbers_to_indices(config.atomic_numbers, z_table=z_table) @@ -118,10 +127,14 @@ def from_config( torch.tensor(indices, dtype=torch.long).unsqueeze(-1), num_classes=len(z_table), ) + try: + head = torch.tensor(heads.index(config.head), dtype=torch.long) + except ValueError: + head = torch.tensor(len(heads) - 1, dtype=torch.long) cell = ( - torch.tensor(config.cell, dtype=torch.get_default_dtype()) - if config.cell is not None + torch.tensor(cell, dtype=torch.get_default_dtype()) + if cell is not None else torch.tensor( 3 * [0.0, 0.0, 0.0], dtype=torch.get_default_dtype() ).view(3, 3) @@ -200,6 +213,7 @@ def from_config( cell=cell, node_attrs=one_hot, weight=weight, + head=head, energy_weight=energy_weight, forces_weight=forces_weight, stress_weight=stress_weight, diff --git a/mace/data/hdf5_dataset.py b/mace/data/hdf5_dataset.py index 5057fd7f..477ccd3f 100644 --- a/mace/data/hdf5_dataset.py +++ b/mace/data/hdf5_dataset.py @@ -66,17 +66,24 @@ def __getitem__(self, index): pbc=unpack_value(subgrp["pbc"][()]), cell=unpack_value(subgrp["cell"][()]), ) + if config.head is None: + config.head = self.kwargs.get("head") atomic_data = AtomicData.from_config( - config, z_table=self.z_table, cutoff=self.r_max + config, + z_table=self.z_table, + cutoff=self.r_max, + heads=self.kwargs.get("heads", ["Default"]), ) return atomic_data -def dataset_from_sharded_hdf5(files: List, z_table: AtomicNumberTable, r_max: float): +def dataset_from_sharded_hdf5( + files: List, z_table: AtomicNumberTable, r_max: float, **kwargs +): files = glob(files + "/*") datasets = [] for file in files: - datasets.append(HDF5Dataset(file, z_table=z_table, r_max=r_max)) + datasets.append(HDF5Dataset(file, z_table=z_table, r_max=r_max, **kwargs)) full_dataset = ConcatDataset(datasets) return full_dataset diff --git a/mace/data/neighborhood.py b/mace/data/neighborhood.py index 293576af..21296fa6 100644 --- a/mace/data/neighborhood.py +++ b/mace/data/neighborhood.py @@ -27,18 +27,18 @@ def get_neighborhood( max_positions = np.max(np.absolute(positions)) + 1 # Extend cell in non-periodic directions # For models with more than 5 layers, the multiplicative constant needs to be increased. - temp_cell = np.copy(cell) + # temp_cell = np.copy(cell) if not pbc_x: - temp_cell[0, :] = max_positions * 5 * cutoff * identity[0, :] + cell[0, :] = max_positions * 5 * cutoff * identity[0, :] if not pbc_y: - temp_cell[1, :] = max_positions * 5 * cutoff * identity[1, :] + cell[1, :] = max_positions * 5 * cutoff * identity[1, :] if not pbc_z: - temp_cell[2, :] = max_positions * 5 * cutoff * identity[2, :] + cell[2, :] = max_positions * 5 * cutoff * identity[2, :] sender, receiver, unit_shifts = neighbour_list( quantities="ijS", pbc=pbc, - cell=temp_cell, + cell=cell, positions=positions, cutoff=cutoff, # self_interaction=True, # we want edges from atom to itself in different periodic images @@ -63,4 +63,4 @@ def get_neighborhood( # D = positions[j]-positions[i]+S.dot(cell) shifts = np.dot(unit_shifts, cell) # [n_edges, 3] - return edge_index, shifts, unit_shifts + return edge_index, shifts, unit_shifts, cell diff --git a/mace/data/utils.py b/mace/data/utils.py index 78e3e76f..bb8e5448 100644 --- a/mace/data/utils.py +++ b/mace/data/utils.py @@ -47,6 +47,7 @@ class Configuration: stress_weight: float = 1.0 # weight of config stress in loss virials_weight: float = 1.0 # weight of config virial in loss config_type: Optional[str] = DEFAULT_CONFIG_TYPE # config_type of config + head: Optional[str] = "Default" # head used to compute the config Configurations = List[Configuration] @@ -91,7 +92,8 @@ def config_from_atoms_list( virials_key="REF_virials", dipole_key="REF_dipole", charges_key="REF_charges", - config_type_weights: Dict[str, float] = None, + head_key="head", + config_type_weights: Optional[Dict[str, float]] = None, ) -> Configurations: """Convert list of ase.Atoms into Configurations""" if config_type_weights is None: @@ -108,6 +110,7 @@ def config_from_atoms_list( virials_key=virials_key, dipole_key=dipole_key, charges_key=charges_key, + head_key=head_key, config_type_weights=config_type_weights, ) ) @@ -122,7 +125,8 @@ def config_from_atoms( virials_key="REF_virials", dipole_key="REF_dipole", charges_key="REF_charges", - config_type_weights: Dict[str, float] = None, + head_key="head", + config_type_weights: Optional[Dict[str, float]] = None, ) -> Configuration: """Convert ase.Atoms to Configuration""" if config_type_weights is None: @@ -149,6 +153,8 @@ def config_from_atoms( stress_weight = atoms.info.get("config_stress_weight", 1.0) virials_weight = atoms.info.get("config_virials_weight", 1.0) + head = atoms.info.get(head_key, "Default") + # fill in missing quantities but set their weight to 0.0 if energy is None: energy = 0.0 @@ -176,6 +182,7 @@ def config_from_atoms( dipole=dipole, charges=charges, weight=weight, + head=head, energy_weight=energy_weight, forces_weight=forces_weight, stress_weight=stress_weight, @@ -193,11 +200,12 @@ def test_config_types( test_by_ct = [] all_cts = [] for conf in test_configs: - if conf.config_type not in all_cts: - all_cts.append(conf.config_type) - test_by_ct.append((conf.config_type, [conf])) + config_type_name = conf.config_type + "_" + conf.head + if config_type_name not in all_cts: + all_cts.append(config_type_name) + test_by_ct.append((config_type_name, [conf])) else: - ind = all_cts.index(conf.config_type) + ind = all_cts.index(config_type_name) test_by_ct[ind][1].append(conf) return test_by_ct @@ -211,6 +219,8 @@ def load_from_xyz( virials_key: str = "REF_virials", dipole_key: str = "REF_dipole", charges_key: str = "REF_charges", + head_key: str = "head", + head_name: str = "Default", extract_atomic_energies: bool = False, keep_isolated_atoms: bool = False, ) -> Tuple[Dict[int, float], Configurations]: @@ -255,6 +265,7 @@ def load_from_xyz( atoms_without_iso_atoms = [] for idx, atoms in enumerate(atoms_list): + atoms.info[head_key] = head_name isolated_atom_config = ( len(atoms) == 1 and atoms.info.get("config_type") == "IsolatedAtom" ) @@ -286,6 +297,7 @@ def load_from_xyz( virials_key=virials_key, dipole_key=dipole_key, charges_key=charges_key, + head_key=head_key, ) return atomic_energies_dict, configs @@ -342,6 +354,7 @@ def save_dataset_as_HDF5(dataset: List, out_name: str) -> None: grp["virials"] = data.virials grp["dipole"] = data.dipole grp["charges"] = data.charges + grp["head"] = data.head def save_AtomicData_to_HDF5(data, i, h5_file) -> None: @@ -364,6 +377,7 @@ def save_AtomicData_to_HDF5(data, i, h5_file) -> None: grp["virials"] = data.virials grp["dipole"] = data.dipole grp["charges"] = data.charges + grp["head"] = data.head def save_configurations_as_HDF5(configurations: Configurations, _, h5_file) -> None: @@ -377,6 +391,7 @@ def save_configurations_as_HDF5(configurations: Configurations, _, h5_file) -> N subgroup["forces"] = write_value(config.forces) subgroup["stress"] = write_value(config.stress) subgroup["virials"] = write_value(config.virials) + subgroup["head"] = write_value(config.head) subgroup["dipole"] = write_value(config.dipole) subgroup["charges"] = write_value(config.charges) subgroup["cell"] = write_value(config.cell) diff --git a/mace/modules/blocks.py b/mace/modules/blocks.py index e8645a8e..34539b0b 100644 --- a/mace/modules/blocks.py +++ b/mace/modules/blocks.py @@ -17,6 +17,7 @@ from .irreps_tools import ( linear_out_irreps, + mask_head, reshape_irreps, tp_out_irreps_with_instructions, ) @@ -46,11 +47,15 @@ def forward( @compile_mode("script") class LinearReadoutBlock(torch.nn.Module): - def __init__(self, irreps_in: o3.Irreps): + def __init__(self, irreps_in: o3.Irreps, irrep_out: o3.Irreps = o3.Irreps("0e")): super().__init__() - self.linear = o3.Linear(irreps_in=irreps_in, irreps_out=o3.Irreps("0e")) + self.linear = o3.Linear(irreps_in=irreps_in, irreps_out=irrep_out) - def forward(self, x: torch.Tensor) -> torch.Tensor: # [n_nodes, irreps] # [..., ] + def forward( + self, + x: torch.Tensor, + heads: Optional[torch.Tensor] = None, # pylint: disable=unused-argument + ) -> torch.Tensor: # [n_nodes, irreps] # [..., ] return self.linear(x) # [n_nodes, 1] @@ -58,19 +63,28 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # [n_nodes, irreps] # [... @compile_mode("script") class NonLinearReadoutBlock(torch.nn.Module): def __init__( - self, irreps_in: o3.Irreps, MLP_irreps: o3.Irreps, gate: Optional[Callable] + self, + irreps_in: o3.Irreps, + MLP_irreps: o3.Irreps, + gate: Optional[Callable], + irrep_out: o3.Irreps = o3.Irreps("0e"), + num_heads: int = 1, ): super().__init__() self.hidden_irreps = MLP_irreps + self.num_heads = num_heads self.linear_1 = o3.Linear(irreps_in=irreps_in, irreps_out=self.hidden_irreps) self.non_linearity = nn.Activation(irreps_in=self.hidden_irreps, acts=[gate]) - self.linear_2 = o3.Linear( - irreps_in=self.hidden_irreps, irreps_out=o3.Irreps("0e") - ) + self.linear_2 = o3.Linear(irreps_in=self.hidden_irreps, irreps_out=irrep_out) - def forward(self, x: torch.Tensor) -> torch.Tensor: # [n_nodes, irreps] # [..., ] + def forward( + self, x: torch.Tensor, heads: Optional[torch.Tensor] = None + ) -> torch.Tensor: # [n_nodes, irreps] # [..., ] x = self.non_linearity(self.linear_1(x)) - return self.linear_2(x) # [n_nodes, 1] + if hasattr(self, "num_heads"): + if self.num_heads > 1 and heads is not None: + x = mask_head(x, heads, self.num_heads) + return self.linear_2(x) # [n_nodes, len(heads)] @compile_mode("script") @@ -133,20 +147,25 @@ class AtomicEnergiesBlock(torch.nn.Module): def __init__(self, atomic_energies: Union[np.ndarray, torch.Tensor]): super().__init__() - assert len(atomic_energies.shape) == 1 + # assert len(atomic_energies.shape) == 1 self.register_buffer( "atomic_energies", torch.tensor(atomic_energies, dtype=torch.get_default_dtype()), - ) # [n_elements, ] + ) # [n_elements, n_heads] def forward( self, x: torch.Tensor # one-hot of elements [..., n_elements] ) -> torch.Tensor: # [..., ] - return torch.matmul(x, self.atomic_energies) + return torch.matmul(x, torch.atleast_2d(self.atomic_energies).T) def __repr__(self): - formatted_energies = ", ".join([f"{x:.4f}" for x in self.atomic_energies]) + formatted_energies = ", ".join( + [ + "[" + ", ".join([f"{x:.4f}" for x in group]) + "]" + for group in torch.atleast_2d(self.atomic_energies) + ] + ) return f"{self.__class__.__name__}(energies=[{formatted_energies}])" @@ -602,7 +621,7 @@ def _setup(self) -> None: input_dim = self.edge_feats_irreps.num_irreps self.conv_tp_weights = nn.FullyConnectedNet( [input_dim] + self.radial_MLP + [self.conv_tp.weight_numel], - torch.nn.functional.silu, + torch.nn.functional.silu, # gate ) # Linear @@ -743,16 +762,28 @@ class ScaleShiftBlock(torch.nn.Module): def __init__(self, scale: float, shift: float): super().__init__() self.register_buffer( - "scale", torch.tensor(scale, dtype=torch.get_default_dtype()) + "scale", + torch.tensor(scale, dtype=torch.get_default_dtype()), ) self.register_buffer( - "shift", torch.tensor(shift, dtype=torch.get_default_dtype()) + "shift", + torch.tensor(shift, dtype=torch.get_default_dtype()), ) - def forward(self, x: torch.Tensor) -> torch.Tensor: - return self.scale * x + self.shift + def forward(self, x: torch.Tensor, head: torch.Tensor) -> torch.Tensor: + return ( + torch.atleast_1d(self.scale)[head] * x + torch.atleast_1d(self.shift)[head] + ) def __repr__(self): - return ( - f"{self.__class__.__name__}(scale={self.scale:.6f}, shift={self.shift:.6f})" + formatted_scale = ( + ", ".join([f"{x:.4f}" for x in self.scale]) + if self.scale.numel() > 1 + else f"{self.scale.item():.4f}" + ) + formatted_shift = ( + ", ".join([f"{x:.4f}" for x in self.shift]) + if self.shift.numel() > 1 + else f"{self.shift.item():.4f}" ) + return f"{self.__class__.__name__}(scale={formatted_scale}, shift={formatted_shift})" diff --git a/mace/modules/irreps_tools.py b/mace/modules/irreps_tools.py index 642f3fa8..b0960193 100644 --- a/mace/modules/irreps_tools.py +++ b/mace/modules/irreps_tools.py @@ -84,3 +84,11 @@ def forward(self, tensor: torch.Tensor) -> torch.Tensor: field = field.reshape(batch, mul, d) out.append(field) return torch.cat(out, dim=-1) + + +def mask_head(x: torch.Tensor, head: torch.Tensor, num_heads: int) -> torch.Tensor: + mask = torch.zeros(x.shape[0], x.shape[1] // num_heads, num_heads, device=x.device) + idx = torch.arange(mask.shape[0], device=x.device) + mask[idx, :, head] = 1 + mask = mask.permute(0, 2, 1).reshape(x.shape) + return x * mask diff --git a/mace/modules/loss.py b/mace/modules/loss.py index b3421ef5..a7e28c55 100644 --- a/mace/modules/loss.py +++ b/mace/modules/loss.py @@ -114,34 +114,34 @@ def conditional_mse_forces(ref: Batch, pred: TensorDict) -> torch.Tensor: def conditional_huber_forces( - ref: Batch, pred: TensorDict, huber_delta: float + ref_forces: Batch, pred_forces: TensorDict, huber_delta: float ) -> torch.Tensor: # Define the multiplication factors for each condition factors = huber_delta * torch.tensor([1.0, 0.7, 0.4, 0.1]) # Apply multiplication factors based on conditions - c1 = torch.norm(ref["forces"], dim=-1) < 100 - c2 = (torch.norm(ref["forces"], dim=-1) >= 100) & ( - torch.norm(ref["forces"], dim=-1) < 200 + c1 = torch.norm(ref_forces, dim=-1) < 100 + c2 = (torch.norm(ref_forces, dim=-1) >= 100) & ( + torch.norm(ref_forces, dim=-1) < 200 ) - c3 = (torch.norm(ref["forces"], dim=-1) >= 200) & ( - torch.norm(ref["forces"], dim=-1) < 300 + c3 = (torch.norm(ref_forces, dim=-1) >= 200) & ( + torch.norm(ref_forces, dim=-1) < 300 ) c4 = ~(c1 | c2 | c3) - se = torch.zeros_like(pred["forces"]) + se = torch.zeros_like(pred_forces) se[c1] = torch.nn.functional.huber_loss( - ref["forces"][c1], pred["forces"][c1], reduction="none", delta=factors[0] + ref_forces[c1], pred_forces[c1], reduction="none", delta=factors[0] ) se[c2] = torch.nn.functional.huber_loss( - ref["forces"][c2], pred["forces"][c2], reduction="none", delta=factors[1] + ref_forces[c2], pred_forces[c2], reduction="none", delta=factors[1] ) se[c3] = torch.nn.functional.huber_loss( - ref["forces"][c3], pred["forces"][c3], reduction="none", delta=factors[2] + ref_forces[c3], pred_forces[c3], reduction="none", delta=factors[2] ) se[c4] = torch.nn.functional.huber_loss( - ref["forces"][c4], pred["forces"][c4], reduction="none", delta=factors[3] + ref_forces[c4], pred_forces[c4], reduction="none", delta=factors[3] ) return torch.mean(se) @@ -273,12 +273,28 @@ def __init__( def forward(self, ref: Batch, pred: TensorDict) -> torch.Tensor: num_atoms = ref.ptr[1:] - ref.ptr[:-1] + configs_stress_weight = ref.stress_weight.view(-1, 1, 1) # [n_graphs, ] + configs_energy_weight = ref.energy_weight # [n_graphs, ] + configs_forces_weight = torch.repeat_interleave( + ref.forces_weight, ref.ptr[1:] - ref.ptr[:-1] + ).unsqueeze(-1) return ( self.energy_weight - * self.huber_loss(ref["energy"] / num_atoms, pred["energy"] / num_atoms) + * self.huber_loss( + configs_energy_weight * ref["energy"] / num_atoms, + configs_energy_weight * pred["energy"] / num_atoms, + ) + self.forces_weight - * conditional_huber_forces(ref, pred, huber_delta=self.huber_delta) - + self.stress_weight * self.huber_loss(ref["stress"], pred["stress"]) + * conditional_huber_forces( + configs_forces_weight * ref["forces"], + configs_forces_weight * pred["forces"], + huber_delta=self.huber_delta, + ) + + self.stress_weight + * self.huber_loss( + configs_stress_weight * ref["stress"], + configs_stress_weight * pred["stress"], + ) ) def __repr__(self): diff --git a/mace/modules/models.py b/mace/modules/models.py index 3e5cb662..c0d8ab43 100644 --- a/mace/modules/models.py +++ b/mace/modules/models.py @@ -61,6 +61,7 @@ def __init__( distance_transform: str = "None", radial_MLP: Optional[List[int]] = None, radial_type: Optional[str] = "bessel", + heads: Optional[List[str]] = None, ): super().__init__() self.register_buffer( @@ -72,6 +73,9 @@ def __init__( self.register_buffer( "num_interactions", torch.tensor(num_interactions, dtype=torch.int64) ) + if heads is None: + heads = ["default"] + self.heads = heads if isinstance(correlation, int): correlation = [correlation] * num_interactions # Embedding @@ -131,7 +135,9 @@ def __init__( self.products = torch.nn.ModuleList([prod]) self.readouts = torch.nn.ModuleList() - self.readouts.append(LinearReadoutBlock(hidden_irreps)) + self.readouts.append( + LinearReadoutBlock(hidden_irreps, o3.Irreps(f"{len(heads)}x0e")) + ) for i in range(num_interactions - 1): if i == num_interactions - 2: @@ -161,10 +167,18 @@ def __init__( self.products.append(prod) if i == num_interactions - 2: self.readouts.append( - NonLinearReadoutBlock(hidden_irreps_out, MLP_irreps, gate) + NonLinearReadoutBlock( + hidden_irreps_out, + (len(heads) * MLP_irreps).simplify(), + gate, + o3.Irreps(f"{len(heads)}x0e"), + len(heads), + ) ) else: - self.readouts.append(LinearReadoutBlock(hidden_irreps)) + self.readouts.append( + LinearReadoutBlock(hidden_irreps, o3.Irreps(f"{len(heads)}x0e")) + ) def forward( self, @@ -179,7 +193,13 @@ def forward( # Setup data["node_attrs"].requires_grad_(True) data["positions"].requires_grad_(True) + num_atoms_arange = torch.arange(data["positions"].shape[0]) num_graphs = data["ptr"].numel() - 1 + node_heads = ( + data["head"][data["batch"]] + if "head" in data + else torch.zeros_like(data["batch"]) + ) displacement = torch.zeros( (num_graphs, 3, 3), dtype=data["positions"].dtype, @@ -200,10 +220,12 @@ def forward( ) # Atomic energies - node_e0 = self.atomic_energies_fn(data["node_attrs"]) + node_e0 = self.atomic_energies_fn(data["node_attrs"])[ + num_atoms_arange, node_heads + ] e0 = scatter_sum( - src=node_e0, index=data["batch"], dim=-1, dim_size=num_graphs - ) # [n_graphs,] + src=node_e0, index=data["batch"], dim=0, dim_size=num_graphs + ) # [n_graphs, n_heads] # Embeddings node_feats = self.node_embedding(data["node_attrs"]) vectors, lengths = get_edge_vectors_and_lengths( @@ -246,13 +268,17 @@ def forward( node_attrs=data["node_attrs"], ) node_feats_list.append(node_feats) - node_energies = readout(node_feats).squeeze(-1) # [n_nodes, ] + node_energies = readout(node_feats, node_heads)[ + num_atoms_arange, node_heads + ] # [n_nodes, len(heads)] energy = scatter_sum( - src=node_energies, index=data["batch"], dim=-1, dim_size=num_graphs + src=node_energies, + index=data["batch"], + dim=0, + dim_size=num_graphs, ) # [n_graphs,] energies.append(energy) node_energies_list.append(node_energies) - # Concatenate node features node_feats_out = torch.cat(node_feats_list, dim=-1) @@ -315,6 +341,12 @@ def forward( data["positions"].requires_grad_(True) data["node_attrs"].requires_grad_(True) num_graphs = data["ptr"].numel() - 1 + num_atoms_arange = torch.arange(data["positions"].shape[0]) + node_heads = ( + data["head"][data["batch"]] + if "head" in data + else torch.zeros_like(data["batch"]) + ) displacement = torch.zeros( (num_graphs, 3, 3), dtype=data["positions"].dtype, @@ -335,10 +367,12 @@ def forward( ) # Atomic energies - node_e0 = self.atomic_energies_fn(data["node_attrs"]) + node_e0 = self.atomic_energies_fn(data["node_attrs"])[ + num_atoms_arange, node_heads + ] e0 = scatter_sum( - src=node_e0, index=data["batch"], dim=-1, dim_size=num_graphs - ) # [n_graphs,] + src=node_e0, index=data["batch"], dim=0, dim_size=num_graphs + ) # [n_graphs, num_heads] # Embeddings node_feats = self.node_embedding(data["node_attrs"]) @@ -374,15 +408,17 @@ def forward( node_feats=node_feats, sc=sc, node_attrs=data["node_attrs"] ) node_feats_list.append(node_feats) - node_es_list.append(readout(node_feats).squeeze(-1)) # {[n_nodes, ], } + node_es_list.append( + readout(node_feats, node_heads)[num_atoms_arange, node_heads] + ) # {[n_nodes, ], } + # Concatenate node features node_feats_out = torch.cat(node_feats_list, dim=-1) - # print("node_es_list", node_es_list) # Sum over interactions node_inter_es = torch.sum( torch.stack(node_es_list, dim=0), dim=0 ) # [n_nodes, ] - node_inter_es = self.scale_shift(node_inter_es) + node_inter_es = self.scale_shift(node_inter_es, node_heads) # Sum over nodes in graph inter_e = scatter_sum( @@ -494,12 +530,15 @@ def __init__( def forward(self, data: AtomicData, training=False) -> Dict[str, Any]: # Setup data.positions.requires_grad = True + num_atoms_arange = torch.arange(data.positions.shape[0]) # Atomic energies - node_e0 = self.atomic_energies_fn(data.node_attrs) + node_e0 = self.atomic_energies_fn(data["node_attrs"])[ + num_atoms_arange, data["head"][data["batch"]] + ] e0 = scatter_sum( src=node_e0, index=data.batch, dim=-1, dim_size=data.num_graphs - ) # [n_graphs,] + ) # [n_graphs, n_heads] # Embeddings node_feats = self.node_embedding(data.node_attrs) @@ -557,9 +596,11 @@ def __init__( def forward(self, data: AtomicData, training=False) -> Dict[str, Any]: # Setup data.positions.requires_grad = True - + num_atoms_arange = torch.arange(data.positions.shape[0]) # Atomic energies - node_e0 = self.atomic_energies_fn(data.node_attrs) + node_e0 = self.atomic_energies_fn(data["node_attrs"])[ + num_atoms_arange, data["head"][data["batch"]] + ] e0 = scatter_sum( src=node_e0, index=data.batch, dim=-1, dim_size=data.num_graphs ) # [n_graphs,] @@ -591,7 +632,7 @@ def forward(self, data: AtomicData, training=False) -> Dict[str, Any]: node_inter_es = torch.sum( torch.stack(node_es_list, dim=0), dim=0 ) # [n_nodes, ] - node_inter_es = self.scale_shift(node_inter_es) + node_inter_es = self.scale_shift(node_inter_es, data["head"][data["batch"]]) # Sum over nodes in graph inter_e = scatter_sum( @@ -951,6 +992,7 @@ def forward( data["node_attrs"].requires_grad_(True) data["positions"].requires_grad_(True) num_graphs = data["ptr"].numel() - 1 + num_atoms_arange = torch.arange(data["positions"].shape[0]) displacement = torch.zeros( (num_graphs, 3, 3), dtype=data["positions"].dtype, @@ -971,7 +1013,9 @@ def forward( ) # Atomic energies - node_e0 = self.atomic_energies_fn(data["node_attrs"]) + node_e0 = self.atomic_energies_fn(data["node_attrs"])[ + num_atoms_arange, data["head"][data["batch"]] + ] e0 = scatter_sum( src=node_e0, index=data["batch"], dim=-1, dim_size=num_graphs ) # [n_graphs,] diff --git a/mace/modules/utils.py b/mace/modules/utils.py index 37fef1bb..5f08c819 100644 --- a/mace/modules/utils.py +++ b/mace/modules/utils.py @@ -14,7 +14,7 @@ from scipy.constants import c, e from mace.tools import to_numpy -from mace.tools.scatter import scatter_sum +from mace.tools.scatter import scatter_mean, scatter_std, scatter_sum from mace.tools.torch_geometric.batch import Batch from .blocks import AtomicEnergiesBlock @@ -144,7 +144,6 @@ def compute_hessians_loop( forces: torch.Tensor, positions: torch.Tensor, ) -> torch.Tensor: - hessian = [] for grad_elem in forces.view(-1): hess_row = torch.autograd.grad( @@ -181,7 +180,6 @@ def get_outputs( Optional[torch.Tensor], ]: if (compute_virials or compute_stress) and displacement is not None: - # forces come for free forces, virials, stress = compute_forces_virials( energy=energy, positions=positions, @@ -229,11 +227,11 @@ def get_edge_vectors_and_lengths( def _check_non_zero(std): - if std == 0.0: + if np.any(std == 0): logging.warning( "Standard deviation of the scaling is zero, Changing to no scaling" ) - std = 1.0 + std[std == 0] = 1 return std @@ -260,20 +258,25 @@ def compute_mean_std_atomic_inter_energy( atomic_energies_fn = AtomicEnergiesBlock(atomic_energies=atomic_energies) avg_atom_inter_es_list = [] + head_list = [] for batch in data_loader: node_e0 = atomic_energies_fn(batch.node_attrs) graph_e0s = scatter_sum( - src=node_e0, index=batch.batch, dim=-1, dim_size=batch.num_graphs - ) + src=node_e0, index=batch.batch, dim=0, dim_size=batch.num_graphs + )[torch.arange(batch.num_graphs), batch.head] graph_sizes = batch.ptr[1:] - batch.ptr[:-1] avg_atom_inter_es_list.append( (batch.energy - graph_e0s) / graph_sizes ) # {[n_graphs], } + head_list.append(batch.head) avg_atom_inter_es = torch.cat(avg_atom_inter_es_list) # [total_n_graphs] - mean = to_numpy(torch.mean(avg_atom_inter_es)).item() - std = to_numpy(torch.std(avg_atom_inter_es)).item() + head = torch.cat(head_list, dim=0) # [total_n_graphs] + # mean = to_numpy(torch.mean(avg_atom_inter_es)).item() + # std = to_numpy(torch.std(avg_atom_inter_es)).item() + mean = to_numpy(scatter_mean(src=avg_atom_inter_es, index=head, dim=0).squeeze(-1)) + std = to_numpy(scatter_std(src=avg_atom_inter_es, index=head, dim=0).squeeze(-1)) std = _check_non_zero(std) return mean, std @@ -283,10 +286,11 @@ def _compute_mean_std_atomic_inter_energy( batch: Batch, atomic_energies_fn: AtomicEnergiesBlock, ) -> Tuple[torch.Tensor, torch.Tensor]: + head = batch.head node_e0 = atomic_energies_fn(batch.node_attrs) graph_e0s = scatter_sum( - src=node_e0, index=batch.batch, dim=-1, dim_size=batch.num_graphs - ) + src=node_e0, index=batch.batch, dim=0, dim_size=batch.num_graphs + )[torch.arange(batch.num_graphs), head] graph_sizes = batch.ptr[1:] - batch.ptr[:-1] atom_energies = (batch.energy - graph_e0s) / graph_sizes return atom_energies @@ -300,23 +304,36 @@ def compute_mean_rms_energy_forces( atom_energy_list = [] forces_list = [] + head_list = [] + head_batch = [] for batch in data_loader: + head = batch.head node_e0 = atomic_energies_fn(batch.node_attrs) graph_e0s = scatter_sum( - src=node_e0, index=batch.batch, dim=-1, dim_size=batch.num_graphs - ) + src=node_e0, index=batch.batch, dim=0, dim_size=batch.num_graphs + )[torch.arange(batch.num_graphs), head] graph_sizes = batch.ptr[1:] - batch.ptr[:-1] atom_energy_list.append( (batch.energy - graph_e0s) / graph_sizes ) # {[n_graphs], } forces_list.append(batch.forces) # {[n_graphs*n_atoms,3], } + head_list.append(head) + head_batch.append(head[batch.batch]) atom_energies = torch.cat(atom_energy_list, dim=0) # [total_n_graphs] forces = torch.cat(forces_list, dim=0) # {[total_n_graphs*n_atoms,3], } - - mean = to_numpy(torch.mean(atom_energies)).item() - rms = to_numpy(torch.sqrt(torch.mean(torch.square(forces)))).item() + head = torch.cat(head_list, dim=0) # [total_n_graphs] + head_batch = torch.cat(head_batch, dim=0) # [total_n_graphs] + + # mean = to_numpy(torch.mean(atom_energies)).item() + # rms = to_numpy(torch.sqrt(torch.mean(torch.square(forces)))).item() + mean = to_numpy(scatter_mean(src=atom_energies, index=head, dim=0).squeeze(-1)) + rms = to_numpy( + torch.sqrt( + scatter_mean(src=torch.square(forces), index=head_batch, dim=0).mean(-1) + ) + ) rms = _check_non_zero(rms) return mean, rms @@ -326,10 +343,11 @@ def _compute_mean_rms_energy_forces( batch: Batch, atomic_energies_fn: AtomicEnergiesBlock, ) -> Tuple[torch.Tensor, torch.Tensor]: + head = batch.head node_e0 = atomic_energies_fn(batch.node_attrs) graph_e0s = scatter_sum( - src=node_e0, index=batch.batch, dim=-1, dim_size=batch.num_graphs - ) + src=node_e0, index=batch.batch, dim=0, dim_size=batch.num_graphs + )[torch.arange(batch.num_graphs), head] graph_sizes = batch.ptr[1:] - batch.ptr[:-1] atom_energies = (batch.energy - graph_e0s) / graph_sizes # {[n_graphs], } forces = batch.forces # {[n_graphs*n_atoms,3], } @@ -339,7 +357,6 @@ def _compute_mean_rms_energy_forces( def compute_avg_num_neighbors(data_loader: torch.utils.data.DataLoader) -> float: num_neighbors = [] - for batch in data_loader: _, receivers = batch.edge_index _, counts = torch.unique(receivers, return_counts=True) @@ -360,17 +377,20 @@ def compute_statistics( atom_energy_list = [] forces_list = [] num_neighbors = [] + head_list = [] for batch in data_loader: + head = batch.head node_e0 = atomic_energies_fn(batch.node_attrs) graph_e0s = scatter_sum( - src=node_e0, index=batch.batch, dim=-1, dim_size=batch.num_graphs - ) + src=node_e0, index=batch.batch, dim=0, dim_size=batch.num_graphs + )[torch.arange(batch.num_graphs), head] graph_sizes = batch.ptr[1:] - batch.ptr[:-1] atom_energy_list.append( (batch.energy - graph_e0s) / graph_sizes ) # {[n_graphs], } forces_list.append(batch.forces) # {[n_graphs*n_atoms,3], } + head_list.append(head) # {[n_graphs], } _, receivers = batch.edge_index _, counts = torch.unique(receivers, return_counts=True) @@ -378,9 +398,15 @@ def compute_statistics( atom_energies = torch.cat(atom_energy_list, dim=0) # [total_n_graphs] forces = torch.cat(forces_list, dim=0) # {[total_n_graphs*n_atoms,3], } - - mean = to_numpy(torch.mean(atom_energies)).item() - rms = to_numpy(torch.sqrt(torch.mean(torch.square(forces)))).item() + head = torch.cat(head_list, dim=0) # [total_n_graphs] + + # mean = to_numpy(torch.mean(atom_energies)).item() + mean = to_numpy(scatter_mean(src=atom_energies, index=head, dim=0).squeeze(-1)) + # do the mean for each head + # rms = to_numpy(torch.sqrt(torch.mean(torch.square(forces)))).item() + rms = to_numpy( + torch.sqrt(scatter_mean(src=torch.square(forces), index=head, dim=0)) + ) avg_num_neighbors = torch.mean( torch.cat(num_neighbors, dim=0).type(torch.get_default_dtype()) diff --git a/mace/tools/__init__.py b/mace/tools/__init__.py index 54c59455..8ad80243 100644 --- a/mace/tools/__init__.py +++ b/mace/tools/__init__.py @@ -2,7 +2,7 @@ from .arg_parser_tools import check_args from .cg import U_matrix_real from .checkpoint import CheckpointHandler, CheckpointIO, CheckpointState -from .finetuning_utils import load_foundations +from .finetuning_utils import load_foundations, load_foundations_elements from .torch_tools import ( TensorDict, cartesian_to_spherical, @@ -28,7 +28,6 @@ compute_rel_rmse, compute_rmse, get_atomic_number_table_from_zs, - get_optimizer, get_tag, setup_logger, ) @@ -46,7 +45,6 @@ "setup_logger", "get_tag", "count_parameters", - "get_optimizer", "MetricsLogger", "get_atomic_number_table_from_zs", "train", @@ -68,5 +66,6 @@ "voigt_to_matrix", "init_wandb", "load_foundations", + "load_foundations_elements", "build_preprocess_arg_parser", ] diff --git a/mace/tools/arg_parser.py b/mace/tools/arg_parser.py index 2b0e2b56..046f04d6 100644 --- a/mace/tools/arg_parser.py +++ b/mace/tools/arg_parser.py @@ -60,7 +60,7 @@ def build_default_arg_parser() -> argparse.ArgumentParser: "--device", help="select device", type=str, - choices=["cpu", "cuda", "mps"], + choices=["cpu", "cuda", "mps", "xpu"], default="cpu", ) parser.add_argument( @@ -228,19 +228,19 @@ def build_default_arg_parser() -> argparse.ArgumentParser: parser.add_argument( "--compute_avg_num_neighbors", help="normalization factor for the message", - type=bool, + type=str2bool, default=True, ) parser.add_argument( "--compute_stress", help="Select True to compute stress", - type=bool, + type=str2bool, default=False, ) parser.add_argument( "--compute_forces", help="Select True to compute forces", - type=bool, + type=str2bool, default=True, ) @@ -249,7 +249,7 @@ def build_default_arg_parser() -> argparse.ArgumentParser: "--train_file", help="Training set file, format is .xyz or .h5", type=str, - required=True, + required=False, ) parser.add_argument( "--valid_file", @@ -280,7 +280,7 @@ def build_default_arg_parser() -> argparse.ArgumentParser: parser.add_argument( "--multi_processed_test", help="Boolean value for whether the test data was multiprocessed", - type=bool, + type=str2bool, default=False, required=False, ) @@ -294,7 +294,7 @@ def build_default_arg_parser() -> argparse.ArgumentParser: "--pin_memory", help="Pin memory for data loading", default=True, - type=bool, + type=str2bool, ) parser.add_argument( "--atomic_numbers", @@ -331,12 +331,66 @@ def build_default_arg_parser() -> argparse.ArgumentParser: default=None, required=False, ) + + # Fine-tuning + parser.add_argument( + "--foundation_filter_elements", + help="Filter element during fine-tuning", + type=str2bool, + default=True, + required=False, + ) + parser.add_argument( + "--heads", + help="Dict of heads: containing individual files and E0s", + type=str, + default=None, + required=False, + ) + parser.add_argument( + "--multiheads_finetuning", + help="Boolean value for whether the model is multiheaded", + type=str2bool, + default=True, + ) + parser.add_argument( + "--weight_pt_head", + help="Weight of the pretrained head in the loss function", + type=float, + default=1.0, + ) + parser.add_argument( + "--num_samples_pt", + help="Number of samples in the pretrained head", + type=int, + default=1000, + ) + parser.add_argument( + "--subselect_pt", + help="Method to subselect the configurations of the pretraining set", + choices=["fps", "random"], + default="random", + ) + parser.add_argument( + "--pt_train_file", + help="Training set file for the pretrained head", + type=str, + default=None, + ) + parser.add_argument( + "--pt_valid_file", + help="Validation set file for the pretrained head", + type=str, + default=None, + ) parser.add_argument( "--keep_isolated_atoms", help="Keep isolated atoms in the dataset, useful for transfer learning", - type=bool, + type=str2bool, default=False, ) + + # Keys parser.add_argument( "--energy_key", help="Key of reference energies in training xyz", @@ -769,7 +823,7 @@ def build_preprocess_arg_parser() -> argparse.ArgumentParser: parser.add_argument( "--shuffle", help="Shuffle the training dataset", - type=bool, + type=str2bool, default=True, ) parser.add_argument( @@ -790,3 +844,13 @@ def check_float_or_none(value: str) -> Optional[float]: f"{value} is an invalid value (float or None)" ) from None return None + + +def str2bool(value): + if isinstance(value, bool): + return value + if value.lower() in ("yes", "true", "t", "y", "1"): + return True + if value.lower() in ("no", "false", "f", "n", "0"): + return False + raise argparse.ArgumentTypeError("Boolean value expected.") diff --git a/mace/tools/finetuning_utils.py b/mace/tools/finetuning_utils.py index 0aad091b..0d4e2f52 100644 --- a/mace/tools/finetuning_utils.py +++ b/mace/tools/finetuning_utils.py @@ -3,12 +3,12 @@ from mace.tools.utils import AtomicNumberTable -def load_foundations( +def load_foundations_elements( model: torch.nn.Module, model_foundations: torch.nn.Module, table: AtomicNumberTable, load_readout=False, - use_shift=False, + use_shift=True, use_scale=True, max_L=2, ): @@ -17,6 +17,7 @@ def load_foundations( """ assert model_foundations.r_max == model.r_max z_table = AtomicNumberTable([int(z) for z in model_foundations.atomic_numbers]) + model_heads = model.heads new_z_table = table num_species_foundations = len(z_table.zs) num_channels_foundation = ( @@ -39,7 +40,6 @@ def load_foundations( model.radial_embedding.bessel_fn.bessel_weights = torch.nn.Parameter( model_foundations.radial_embedding.bessel_fn.bessel_weights.clone() ) - for i in range(int(model.num_interactions)): model.interactions[i].linear_up.weight = torch.nn.Parameter( model_foundations.interactions[i].linear_up.weight.clone() @@ -101,6 +101,7 @@ def load_foundations( .clone() / (num_species_foundations / num_species) ** 0.5 ) + # Transferring products for i in range(2): # Assuming 2 products modules max_range = max_L + 1 if i == 0 else 1 @@ -130,20 +131,62 @@ def load_foundations( if load_readout: # Transferring readouts + model_readouts_zero_linear_weight = model.readouts[0].linear.weight.clone() + model_readouts_zero_linear_weight = ( + model_foundations.readouts[0] + .linear.weight.view(num_channels_foundation, -1) + .repeat(1, len(model_heads)) + .flatten() + .clone() + ) model.readouts[0].linear.weight = torch.nn.Parameter( - model_foundations.readouts[0].linear.weight.clone() + model_readouts_zero_linear_weight ) + shape_input_1 = ( + model_foundations.readouts[1].linear_1.__dict__["irreps_out"].num_irreps + ) + shape_output_1 = model.readouts[1].linear_1.__dict__["irreps_out"].num_irreps + model_readouts_one_linear_1_weight = model.readouts[1].linear_1.weight.clone() + model_readouts_one_linear_1_weight = ( + model_foundations.readouts[1] + .linear_1.weight.view(num_channels_foundation, -1) + .repeat(1, len(model_heads)) + .flatten() + .clone() + ) model.readouts[1].linear_1.weight = torch.nn.Parameter( - model_foundations.readouts[1].linear_1.weight.clone() + model_readouts_one_linear_1_weight + ) + model_readouts_one_linear_2_weight = model.readouts[1].linear_2.weight.clone() + model_readouts_one_linear_2_weight = model_foundations.readouts[ + 1 + ].linear_2.weight.view(shape_input_1, -1).repeat( + len(model_heads), len(model_heads) + ).flatten().clone() / ( + ((shape_input_1) / (shape_output_1)) ** 0.5 ) - model.readouts[1].linear_2.weight = torch.nn.Parameter( - model_foundations.readouts[1].linear_2.weight.clone() + model_readouts_one_linear_2_weight ) if model_foundations.scale_shift is not None: if use_scale: - model.scale_shift.scale = model_foundations.scale_shift.scale.clone() + model.scale_shift.scale = model_foundations.scale_shift.scale.repeat( + len(model_heads) + ).clone() if use_shift: - model.scale_shift.shift = model_foundations.scale_shift.shift.clone() + model.scale_shift.shift = model_foundations.scale_shift.shift.repeat( + len(model_heads) + ).clone() + return model + + +def load_foundations( + model, + model_foundations, +): + for name, param in model_foundations.named_parameters(): + if name in model.state_dict().keys(): + if "readouts" not in name: + model.state_dict()[name].copy_(param) return model diff --git a/mace/tools/model_script_utils.py b/mace/tools/model_script_utils.py new file mode 100644 index 00000000..8e8c2877 --- /dev/null +++ b/mace/tools/model_script_utils.py @@ -0,0 +1,229 @@ +import ast +import logging + +import numpy as np +from e3nn import o3 + +from mace import modules +from mace.tools.finetuning_utils import load_foundations_elements +from mace.tools.scripts_utils import extract_config_mace_model + + +def configure_model( + args, train_loader, atomic_energies, model_foundation=None, heads=None, z_table=None +): + # Selecting outputs + compute_virials = args.loss in ("stress", "virials", "huber", "universal") + if compute_virials: + args.compute_stress = True + args.error_table = "PerAtomRMSEstressvirials" + + output_args = { + "energy": args.compute_energy, + "forces": args.compute_forces, + "virials": compute_virials, + "stress": args.compute_stress, + "dipoles": args.compute_dipole, + } + logging.info( + f"During training the following quantities will be reported: {', '.join([f'{report}' for report, value in output_args.items() if value])}" + ) + logging.info("===========MODEL DETAILS===========") + + if args.scaling == "no_scaling": + args.std = 1.0 + logging.info("No scaling selected") + elif (args.mean is None or args.std is None) and args.model != "AtomicDipolesMACE": + args.mean, args.std = modules.scaling_classes[args.scaling]( + train_loader, atomic_energies + ) + + # Build model + if model_foundation is not None and args.model in ["MACE", "ScaleShiftMACE"]: + logging.info("Loading FOUNDATION model") + model_config_foundation = extract_config_mace_model(model_foundation) + model_config_foundation["atomic_energies"] = atomic_energies + model_config_foundation["atomic_numbers"] = z_table.zs + model_config_foundation["num_elements"] = len(z_table) + args.max_L = model_config_foundation["hidden_irreps"].lmax + + if args.model == "MACE" and model_foundation.__class__.__name__ == "MACE": + model_config_foundation["atomic_inter_shift"] = [0.0] * len(heads) + else: + model_config_foundation["atomic_inter_shift"] = ( + _determine_atomic_inter_shift(args.mean, heads) + ) + + model_config_foundation["atomic_inter_scale"] = [1.0] * len(heads) + args.avg_num_neighbors = model_config_foundation["avg_num_neighbors"] + args.model = "FoundationMACE" + model_config_foundation["heads"] = heads + model_config = model_config_foundation + + logging.info("Model configuration extracted from foundation model") + logging.info("Using universal loss function for fine-tuning") + logging.info( + f"Message passing with hidden irreps {model_config_foundation['hidden_irreps']})" + ) + logging.info( + f"{model_config_foundation['num_interactions']} layers, each with correlation order: {model_config_foundation['correlation']} (body order: {model_config_foundation['correlation']+1}) and spherical harmonics up to: l={model_config_foundation['max_ell']}" + ) + logging.info( + f"Radial cutoff: {model_config_foundation['r_max']} A (total receptive field for each atom: {model_config_foundation['r_max'] * model_config_foundation['num_interactions']} A)" + ) + logging.info( + f"Distance transform for radial basis functions: {model_config_foundation['distance_transform']}" + ) + else: + logging.info("Building model") + logging.info( + f"Message passing with {args.num_channels} channels and max_L={args.max_L} ({args.hidden_irreps})" + ) + logging.info( + f"{args.num_interactions} layers, each with correlation order: {args.correlation} (body order: {args.correlation+1}) and spherical harmonics up to: l={args.max_ell}" + ) + logging.info( + f"{args.num_radial_basis} radial and {args.num_cutoff_basis} basis functions" + ) + logging.info( + f"Radial cutoff: {args.r_max} A (total receptive field for each atom: {args.r_max * args.num_interactions} A)" + ) + logging.info( + f"Distance transform for radial basis functions: {args.distance_transform}" + ) + + assert ( + len({irrep.mul for irrep in o3.Irreps(args.hidden_irreps)}) == 1 + ), "All channels must have the same dimension, use the num_channels and max_L keywords to specify the number of channels and the maximum L" + + logging.info(f"Hidden irreps: {args.hidden_irreps}") + + model_config = dict( + r_max=args.r_max, + num_bessel=args.num_radial_basis, + num_polynomial_cutoff=args.num_cutoff_basis, + max_ell=args.max_ell, + interaction_cls=modules.interaction_classes[args.interaction], + num_interactions=args.num_interactions, + num_elements=len(z_table), + hidden_irreps=o3.Irreps(args.hidden_irreps), + atomic_energies=atomic_energies, + avg_num_neighbors=args.avg_num_neighbors, + atomic_numbers=z_table.zs, + ) + model_config_foundation = None + + model = _build_model(args, model_config, model_config_foundation, heads) + + if model_foundation is not None: + model = load_foundations_elements( + model, + model_foundation, + z_table, + load_readout=args.foundation_filter_elements, + max_L=args.max_L, + ) + + return model, output_args + + +def _determine_atomic_inter_shift(mean, heads): + if isinstance(mean, np.ndarray): + if mean.size == 1: + return mean.item() + if mean.size == len(heads): + return mean.tolist() + logging.info("Mean not in correct format, using default value of 0.0") + return [0.0] * len(heads) + if isinstance(mean, list) and len(mean) == len(heads): + return mean + if isinstance(mean, float): + return [mean] * len(heads) + logging.info("Mean not in correct format, using default value of 0.0") + return [0.0] * len(heads) + + +def _build_model( + args, model_config, model_config_foundation, heads +): # pylint: disable=too-many-return-statements + if args.model == "MACE": + return modules.ScaleShiftMACE( + **model_config, + pair_repulsion=args.pair_repulsion, + distance_transform=args.distance_transform, + correlation=args.correlation, + gate=modules.gate_dict[args.gate], + interaction_cls_first=modules.interaction_classes[ + "RealAgnosticInteractionBlock" + ], + MLP_irreps=o3.Irreps(args.MLP_irreps), + atomic_inter_scale=args.std, + atomic_inter_shift=[0.0] * len(heads), + radial_MLP=ast.literal_eval(args.radial_MLP), + radial_type=args.radial_type, + heads=heads, + ) + if args.model == "ScaleShiftMACE": + return modules.ScaleShiftMACE( + **model_config, + pair_repulsion=args.pair_repulsion, + distance_transform=args.distance_transform, + correlation=args.correlation, + gate=modules.gate_dict[args.gate], + interaction_cls_first=modules.interaction_classes[args.interaction_first], + MLP_irreps=o3.Irreps(args.MLP_irreps), + atomic_inter_scale=args.std, + atomic_inter_shift=args.mean, + radial_MLP=ast.literal_eval(args.radial_MLP), + radial_type=args.radial_type, + heads=heads, + ) + if args.model == "FoundationMACE": + return modules.ScaleShiftMACE(**model_config_foundation) + if args.model == "ScaleShiftBOTNet": + return modules.ScaleShiftBOTNet( + **model_config, + gate=modules.gate_dict[args.gate], + interaction_cls_first=modules.interaction_classes[args.interaction_first], + MLP_irreps=o3.Irreps(args.MLP_irreps), + atomic_inter_scale=args.std, + atomic_inter_shift=args.mean, + ) + if args.model == "BOTNet": + return modules.BOTNet( + **model_config, + gate=modules.gate_dict[args.gate], + interaction_cls_first=modules.interaction_classes[args.interaction_first], + MLP_irreps=o3.Irreps(args.MLP_irreps), + ) + if args.model == "AtomicDipolesMACE": + assert args.loss == "dipole", "Use dipole loss with AtomicDipolesMACE model" + assert ( + args.error_table == "DipoleRMSE" + ), "Use error_table DipoleRMSE with AtomicDipolesMACE model" + return modules.AtomicDipolesMACE( + **model_config, + correlation=args.correlation, + gate=modules.gate_dict[args.gate], + interaction_cls_first=modules.interaction_classes[ + "RealAgnosticInteractionBlock" + ], + MLP_irreps=o3.Irreps(args.MLP_irreps), + ) + if args.model == "EnergyDipolesMACE": + assert ( + args.loss == "energy_forces_dipole" + ), "Use energy_forces_dipole loss with EnergyDipolesMACE model" + assert ( + args.error_table == "EnergyDipoleRMSE" + ), "Use error_table EnergyDipoleRMSE with AtomicDipolesMACE model" + return modules.EnergyDipolesMACE( + **model_config, + correlation=args.correlation, + gate=modules.gate_dict[args.gate], + interaction_cls_first=modules.interaction_classes[ + "RealAgnosticInteractionBlock" + ], + MLP_irreps=o3.Irreps(args.MLP_irreps), + ) + raise RuntimeError(f"Unknown model: '{args.model}'") diff --git a/mace/tools/multihead_tools.py b/mace/tools/multihead_tools.py new file mode 100644 index 00000000..ffde107f --- /dev/null +++ b/mace/tools/multihead_tools.py @@ -0,0 +1,185 @@ +import argparse +import dataclasses +import logging +import os +import urllib.request +from typing import Any, Dict, List, Optional, Union + +import torch + +from mace.cli.fine_tuning_select import select_samples +from mace.tools.scripts_utils import ( + SubsetCollection, + dict_to_namespace, + get_dataset_from_xyz, +) + + +@dataclasses.dataclass +class HeadConfig: + head_name: str + train_file: Optional[str] = None + valid_file: Optional[str] = None + test_file: Optional[str] = None + test_dir: Optional[str] = None + E0s: Optional[Any] = None + statistics_file: Optional[str] = None + valid_fraction: Optional[float] = None + config_type_weights: Optional[Dict[str, float]] = None + energy_key: Optional[str] = None + forces_key: Optional[str] = None + stress_key: Optional[str] = None + virials_key: Optional[str] = None + dipole_key: Optional[str] = None + charges_key: Optional[str] = None + keep_isolated_atoms: Optional[bool] = None + atomic_numbers: Optional[Union[List[int], List[str]]] = None + mean: Optional[float] = None + std: Optional[float] = None + avg_num_neighbors: Optional[float] = None + compute_avg_num_neighbors: Optional[bool] = None + collections: Optional[SubsetCollection] = None + train_loader: torch.utils.data.DataLoader = None + z_table: Optional[Any] = None + atomic_energies_dict: Optional[Dict[str, float]] = None + + +def dict_head_to_dataclass( + head: Dict[str, Any], head_name: str, args: argparse.Namespace +) -> HeadConfig: + + return HeadConfig( + head_name=head_name, + train_file=head.get("train_file", args.train_file), + valid_file=head.get("valid_file", args.valid_file), + test_file=head.get("test_file", None), + test_dir=head.get("test_dir", None), + E0s=head.get("E0s", args.E0s), + statistics_file=head.get("statistics_file", args.statistics_file), + valid_fraction=head.get("valid_fraction", args.valid_fraction), + config_type_weights=head.get("config_type_weights", args.config_type_weights), + compute_avg_num_neighbors=head.get( + "compute_avg_num_neighbors", args.compute_avg_num_neighbors + ), + atomic_numbers=head.get("atomic_numbers", args.atomic_numbers), + mean=head.get("mean", args.mean), + std=head.get("std", args.std), + avg_num_neighbors=head.get("avg_num_neighbors", args.avg_num_neighbors), + energy_key=head.get("energy_key", args.energy_key), + forces_key=head.get("forces_key", args.forces_key), + stress_key=head.get("stress_key", args.stress_key), + virials_key=head.get("virials_key", args.virials_key), + dipole_key=head.get("dipole_key", args.dipole_key), + charges_key=head.get("charges_key", args.charges_key), + keep_isolated_atoms=head.get("keep_isolated_atoms", args.keep_isolated_atoms), + ) + + +def prepare_default_head(args: argparse.Namespace) -> Dict[str, Any]: + return { + "default": { + "train_file": args.train_file, + "valid_file": args.valid_file, + "test_file": args.test_file, + "test_dir": args.test_dir, + "E0s": args.E0s, + "statistics_file": args.statistics_file, + "valid_fraction": args.valid_fraction, + "config_type_weights": args.config_type_weights, + "energy_key": args.energy_key, + "forces_key": args.forces_key, + "stress_key": args.stress_key, + "virials_key": args.virials_key, + "dipole_key": args.dipole_key, + "charges_key": args.charges_key, + "keep_isolated_atoms": args.keep_isolated_atoms, + } + } + + +def assemble_mp_data( + args: argparse.Namespace, tag: str, head_configs: List[HeadConfig] +) -> Dict[str, Any]: + try: + checkpoint_url = "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b/mp_traj_combined.xyz" + descriptors_url = "https://github.com/ACEsuit/mace-mp/releases/download/mace_mp_0b/descriptors.npy" + cache_dir = os.path.expanduser("~/.cache/mace") + checkpoint_url_name = "".join( + c for c in os.path.basename(checkpoint_url) if c.isalnum() or c in "_" + ) + cached_dataset_path = f"{cache_dir}/{checkpoint_url_name}" + descriptors_url_name = "".join( + c for c in os.path.basename(descriptors_url) if c.isalnum() or c in "_" + ) + cached_descriptors_path = f"{cache_dir}/{descriptors_url_name}" + if not os.path.isfile(cached_dataset_path): + os.makedirs(cache_dir, exist_ok=True) + # download and save to disk + logging.info("Downloading MP structures for finetuning") + _, http_msg = urllib.request.urlretrieve( + checkpoint_url, cached_dataset_path + ) + if "Content-Type: text/html" in http_msg: + raise RuntimeError( + f"Dataset download failed, please check the URL {checkpoint_url}" + ) + logging.info(f"Materials Project dataset to {cached_dataset_path}") + if not os.path.isfile(cached_descriptors_path): + os.makedirs(cache_dir, exist_ok=True) + # download and save to disk + logging.info("Downloading MP descriptors for finetuning") + _, http_msg = urllib.request.urlretrieve( + descriptors_url, cached_descriptors_path + ) + if "Content-Type: text/html" in http_msg: + raise RuntimeError( + f"Descriptors download failed, please check the URL {descriptors_url}" + ) + logging.info(f"Materials Project descriptors to {cached_descriptors_path}") + dataset_mp = cached_dataset_path + descriptors_mp = cached_descriptors_path + msg = f"Using Materials Project dataset with {dataset_mp}" + logging.info(msg) + msg = f"Using Materials Project descriptors with {descriptors_mp}" + logging.info(msg) + config_pt_paths = [head.train_file for head in head_configs] + args_samples = { + "configs_pt": dataset_mp, + "configs_ft": config_pt_paths, + "num_samples": args.num_samples_pt, + "seed": args.seed, + "model": args.foundation_model, + "head_pt": "pbe_mp", + "head_ft": "Default", + "weight_pt": args.weight_pt_head, + "weight_ft": 1.0, + "filtering_type": "combination", + "output": f"mp_finetuning-{tag}.xyz", + "descriptors": descriptors_mp, + "subselect": args.subselect_pt, + "device": args.device, + "default_dtype": args.default_dtype, + } + select_samples(dict_to_namespace(args_samples)) + collections_mp, _ = get_dataset_from_xyz( + work_dir=args.work_dir, + train_path=f"mp_finetuning-{tag}.xyz", + valid_path=None, + valid_fraction=args.valid_fraction, + config_type_weights=None, + test_path=None, + seed=args.seed, + energy_key="energy", + forces_key="forces", + stress_key="stress", + head_name="pt_head", + virials_key=args.virials_key, + dipole_key=args.dipole_key, + charges_key=args.charges_key, + keep_isolated_atoms=args.keep_isolated_atoms, + ) + return collections_mp + except Exception as exc: + raise RuntimeError( + "Model or descriptors download failed and no local model found" + ) from exc diff --git a/mace/tools/scripts_utils.py b/mace/tools/scripts_utils.py index a353b447..ce68c742 100644 --- a/mace/tools/scripts_utils.py +++ b/mace/tools/scripts_utils.py @@ -4,6 +4,7 @@ # This program is distributed under the MIT License (see MIT.md) ########################################################################################### +import argparse import ast import dataclasses import json @@ -16,9 +17,11 @@ import torch.distributed from e3nn import o3 from prettytable import PrettyTable +from torch.optim.swa_utils import SWALR, AveragedModel -from mace import data, modules +from mace import data, modules, tools from mace.tools import evaluate +from mace.tools.train import SWAContainer @dataclasses.dataclass @@ -31,18 +34,20 @@ class SubsetCollection: def get_dataset_from_xyz( work_dir: str, train_path: str, - valid_path: str, + valid_path: Optional[str], valid_fraction: float, config_type_weights: Dict, test_path: str = None, seed: int = 1234, keep_isolated_atoms: bool = False, + head_name: str = "Default", energy_key: str = "REF_energy", forces_key: str = "REF_forces", stress_key: str = "REF_stress", virials_key: str = "virials", dipole_key: str = "dipoles", charges_key: str = "charges", + head_key: str = "head", ) -> Tuple[SubsetCollection, Optional[Dict[int, float]]]: """Load training and test dataset from xyz file""" atomic_energies_dict, all_train_configs = data.load_from_xyz( @@ -54,8 +59,10 @@ def get_dataset_from_xyz( virials_key=virials_key, dipole_key=dipole_key, charges_key=charges_key, + head_key=head_key, extract_atomic_energies=True, keep_isolated_atoms=keep_isolated_atoms, + head_name=head_name, ) logging.info( f"Training set [{len(all_train_configs)} configs, {np.sum([1 if config.energy else 0 for config in all_train_configs])} energy, {np.sum([config.forces.size for config in all_train_configs])} forces] loaded from '{train_path}'" @@ -70,7 +77,9 @@ def get_dataset_from_xyz( virials_key=virials_key, dipole_key=dipole_key, charges_key=charges_key, + head_key=head_key, extract_atomic_energies=False, + head_name=head_name, ) logging.info( f"Validation set [{len(valid_configs)} configs, {np.sum([1 if config.energy else 0 for config in valid_configs])} energy, {np.sum([config.forces.size for config in valid_configs])} forces] loaded from '{valid_path}'" @@ -95,7 +104,9 @@ def get_dataset_from_xyz( stress_key=stress_key, virials_key=virials_key, charges_key=charges_key, + head_key=head_key, extract_atomic_energies=False, + head_name=head_name, ) # create list of tuples (config_type, list(Atoms)) test_configs = data.test_config_types(all_test_configs) @@ -163,6 +174,8 @@ def radial_to_transform(radial): return "Soft" return radial.distance_transform.__class__.__name__ + scale = model.scale_shift.scale + shift = model.scale_shift.shift config = { "r_max": model.r_max.item(), "num_bessel": len(model.radial_embedding.bessel_fn.bessel_weights), @@ -198,8 +211,8 @@ def radial_to_transform(radial): "radial_MLP": model.interactions[0].conv_tp_weights.hs[1:-1], "pair_repulsion": hasattr(model, "pair_repulsion_fn"), "distance_transform": radial_to_transform(model.radial_embedding), - "atomic_inter_scale": model.scale_shift.scale.item(), - "atomic_inter_shift": model.scale_shift.shift.item(), + "atomic_inter_scale": scale.cpu().numpy(), + "atomic_inter_shift": shift.cpu().numpy(), } return config @@ -313,7 +326,14 @@ def get_atomic_energies(E0s, train_collection, z_table) -> dict: atomic_energies_dict = json.load(f) else: try: - atomic_energies_dict = ast.literal_eval(E0s) + atomic_energies_eval = ast.literal_eval(E0s) + if not all( + isinstance(value, dict) + for value in atomic_energies_eval.values() + ): + atomic_energies_dict = atomic_energies_eval + else: + atomic_energies_dict = atomic_energies_eval assert isinstance(atomic_energies_dict, dict) except Exception as e: raise RuntimeError( @@ -326,55 +346,260 @@ def get_atomic_energies(E0s, train_collection, z_table) -> dict: return atomic_energies_dict +def get_avg_num_neighbors(head_configs, args, train_loader, device): + if all(head_config.compute_avg_num_neighbors for head_config in head_configs): + logging.info("Computing average number of neighbors") + avg_num_neighbors = modules.compute_avg_num_neighbors(train_loader) + if args.distributed: + num_graphs = torch.tensor(len(train_loader.dataset)).to(device) + num_neighbors = num_graphs * torch.tensor(avg_num_neighbors).to(device) + torch.distributed.all_reduce(num_graphs, op=torch.distributed.ReduceOp.SUM) + torch.distributed.all_reduce( + num_neighbors, op=torch.distributed.ReduceOp.SUM + ) + avg_num_neighbors_out = (num_neighbors / num_graphs).item() + else: + avg_num_neighbors_out = avg_num_neighbors + else: + assert any( + head_config.avg_num_neighbors is not None for head_config in head_configs + ), "Average number of neighbors must be provided in the configuration" + avg_num_neighbors_out = max( + head_config.avg_num_neighbors + for head_config in head_configs + if head_config.avg_num_neighbors is not None + ) + if avg_num_neighbors_out < 2 or avg_num_neighbors_out > 100: + logging.warning( + f"Unusual average number of neighbors: {avg_num_neighbors_out:.1f}" + ) + else: + logging.info(f"Average number of neighbors: {avg_num_neighbors_out}") + return avg_num_neighbors_out + + def get_loss_fn( - loss: str, - energy_weight: float, - forces_weight: float, - stress_weight: float, - virials_weight: float, - dipole_weight: float, + args: argparse.Namespace, dipole_only: bool, compute_dipole: bool, ) -> torch.nn.Module: - if loss == "weighted": + if args.loss == "weighted": loss_fn = modules.WeightedEnergyForcesLoss( - energy_weight=energy_weight, forces_weight=forces_weight + energy_weight=args.energy_weight, forces_weight=args.forces_weight ) - elif loss == "forces_only": - loss_fn = modules.WeightedForcesLoss(forces_weight=forces_weight) - elif loss == "virials": + elif args.loss == "forces_only": + loss_fn = modules.WeightedForcesLoss(forces_weight=args.forces_weight) + elif args.loss == "virials": loss_fn = modules.WeightedEnergyForcesVirialsLoss( - energy_weight=energy_weight, - forces_weight=forces_weight, - virials_weight=virials_weight, + energy_weight=args.energy_weight, + forces_weight=args.forces_weight, + virials_weight=args.virials_weight, ) - elif loss == "stress": + elif args.loss == "stress": loss_fn = modules.WeightedEnergyForcesStressLoss( - energy_weight=energy_weight, - forces_weight=forces_weight, - stress_weight=stress_weight, + energy_weight=args.energy_weight, + forces_weight=args.forces_weight, + stress_weight=args.stress_weight, ) - elif loss == "dipole": + elif args.loss == "huber": + loss_fn = modules.WeightedHuberEnergyForcesStressLoss( + energy_weight=args.energy_weight, + forces_weight=args.forces_weight, + stress_weight=args.stress_weight, + huber_delta=args.huber_delta, + ) + elif args.loss == "universal": + loss_fn = modules.UniversalLoss( + energy_weight=args.energy_weight, + forces_weight=args.forces_weight, + stress_weight=args.stress_weight, + huber_delta=args.huber_delta, + ) + elif args.loss == "dipole": assert ( dipole_only is True ), "dipole loss can only be used with AtomicDipolesMACE model" loss_fn = modules.DipoleSingleLoss( - dipole_weight=dipole_weight, + dipole_weight=args.dipole_weight, ) - elif loss == "energy_forces_dipole": + elif args.loss == "energy_forces_dipole": assert dipole_only is False and compute_dipole is True loss_fn = modules.WeightedEnergyForcesDipoleLoss( - energy_weight=energy_weight, - forces_weight=forces_weight, - dipole_weight=dipole_weight, + energy_weight=args.energy_weight, + forces_weight=args.forces_weight, + dipole_weight=args.dipole_weight, ) else: - loss_fn = modules.EnergyForcesLoss( - energy_weight=energy_weight, forces_weight=forces_weight - ) + loss_fn = modules.WeightedEnergyForcesLoss(energy_weight=1.0, forces_weight=1.0) return loss_fn +def get_swa( + args: argparse.Namespace, + model: torch.nn.Module, + optimizer: torch.optim.Optimizer, + swas: List[bool], + dipole_only: bool = False, +): + assert dipole_only is False, "Stage Two for dipole fitting not implemented" + swas.append(True) + if args.start_swa is None: + args.start_swa = max(1, args.max_num_epochs // 4 * 3) + else: + if args.start_swa > args.max_num_epochs: + logging.warning( + f"Start Stage Two must be less than max_num_epochs, got {args.start_swa} > {args.max_num_epochs}" + ) + if args.loss == "forces_only": + raise ValueError("Can not select Stage Two with forces only loss.") + if args.loss == "virials": + loss_fn_energy = modules.WeightedEnergyForcesVirialsLoss( + energy_weight=args.swa_energy_weight, + forces_weight=args.swa_forces_weight, + virials_weight=args.swa_virials_weight, + ) + logging.info( + f"Stage Two (after {args.start_swa} epochs) with loss function: {loss_fn_energy}, energy weight : {args.swa_energy_weight}, forces weight : {args.swa_forces_weight}, virials_weight: {args.swa_virials_weight} and learning rate : {args.swa_lr}" + ) + elif args.loss == "stress": + loss_fn_energy = modules.WeightedEnergyForcesStressLoss( + energy_weight=args.swa_energy_weight, + forces_weight=args.swa_forces_weight, + stress_weight=args.swa_stress_weight, + ) + logging.info( + f"Stage Two (after {args.start_swa} epochs) with loss function: {loss_fn_energy}, energy weight : {args.swa_energy_weight}, forces weight : {args.swa_forces_weight}, stress weight : {args.stress_weight} and learning rate : {args.swa_lr}" + ) + elif args.loss == "energy_forces_dipole": + loss_fn_energy = modules.WeightedEnergyForcesDipoleLoss( + args.swa_energy_weight, + forces_weight=args.swa_forces_weight, + dipole_weight=args.swa_dipole_weight, + ) + logging.info( + f"Stage Two (after {args.start_swa} epochs) with loss function: {loss_fn_energy}, with energy weight : {args.swa_energy_weight}, forces weight : {args.swa_forces_weight}, dipole weight : {args.swa_dipole_weight} and learning rate : {args.swa_lr}" + ) + elif args.loss == "universal": + loss_fn_energy = modules.UniversalLoss( + energy_weight=args.swa_energy_weight, + forces_weight=args.swa_forces_weight, + stress_weight=args.swa_stress_weight, + huber_delta=args.huber_delta, + ) + logging.info( + f"Stage Two (after {args.start_swa} epochs) with loss function: {loss_fn_energy}, with energy weight : {args.swa_energy_weight}, forces weight : {args.swa_forces_weight}, stress weight : {args.swa_stress_weight} and learning rate : {args.swa_lr}" + ) + else: + loss_fn_energy = modules.WeightedEnergyForcesLoss( + energy_weight=args.swa_energy_weight, + forces_weight=args.swa_forces_weight, + ) + logging.info( + f"Stage Two (after {args.start_swa} epochs) with loss function: {loss_fn_energy}, with energy weight : {args.swa_energy_weight}, forces weight : {args.swa_forces_weight} and learning rate : {args.swa_lr}" + ) + swa = SWAContainer( + model=AveragedModel(model), + scheduler=SWALR( + optimizer=optimizer, + swa_lr=args.swa_lr, + anneal_epochs=1, + anneal_strategy="linear", + ), + start=args.start_swa, + loss_fn=loss_fn_energy, + ) + return swa, swas + + +def get_params_options( + args: argparse.Namespace, model: torch.nn.Module +) -> Dict[str, Any]: + decay_interactions = {} + no_decay_interactions = {} + for name, param in model.interactions.named_parameters(): + if "linear.weight" in name or "skip_tp_full.weight" in name: + decay_interactions[name] = param + else: + no_decay_interactions[name] = param + + param_options = dict( + params=[ + { + "name": "embedding", + "params": model.node_embedding.parameters(), + "weight_decay": 0.0, + }, + { + "name": "interactions_decay", + "params": list(decay_interactions.values()), + "weight_decay": args.weight_decay, + }, + { + "name": "interactions_no_decay", + "params": list(no_decay_interactions.values()), + "weight_decay": 0.0, + }, + { + "name": "products", + "params": model.products.parameters(), + "weight_decay": args.weight_decay, + }, + { + "name": "readouts", + "params": model.readouts.parameters(), + "weight_decay": 0.0, + }, + ], + lr=args.lr, + amsgrad=args.amsgrad, + betas=(args.beta, 0.999), + ) + return param_options + + +def get_optimizer( + args: argparse.Namespace, param_options: Dict[str, Any] +) -> torch.optim.Optimizer: + if args.optimizer == "adamw": + optimizer = torch.optim.AdamW(**param_options) + elif args.optimizer == "schedulefree": + try: + from schedulefree import adamw_schedulefree + except ImportError as exc: + raise ImportError( + "`schedulefree` is not installed. Please install it via `pip install schedulefree` or `pip install mace-torch[schedulefree]`" + ) from exc + _param_options = {k: v for k, v in param_options.items() if k != "amsgrad"} + optimizer = adamw_schedulefree.AdamWScheduleFree(**_param_options) + else: + optimizer = torch.optim.Adam(**param_options) + return optimizer + + +def setup_wandb(args: argparse.Namespace): + logging.info("Using Weights and Biases for logging") + import wandb + + wandb_config = {} + args_dict = vars(args) + + for key, value in args_dict.items(): + if isinstance(value, np.ndarray): + args_dict[key] = value.tolist() + + args_dict_json = json.dumps(args_dict) + for key in args.wandb_log_hypers: + wandb_config[key] = args_dict[key] + tools.init_wandb( + project=args.wandb_project, + entity=args.wandb_entity, + name=args.wandb_name, + config=wandb_config, + directory=args.wandb_dir, + ) + wandb.run.summary["params"] = args_dict_json + + def get_files_with_suffix(dir_path: str, suffix: str) -> List[str]: return [ os.path.join(dir_path, f) for f in os.listdir(dir_path) if f.endswith(suffix) @@ -394,6 +619,23 @@ def custom_key(key): return (2, key) +def dict_to_array(input_data, heads): + if not all(isinstance(value, dict) for value in input_data.values()): + return np.array(list(input_data.values())) + unique_keys = set() + for inner_dict in input_data.values(): + unique_keys.update(inner_dict.keys()) + unique_keys = list(unique_keys) + sorted_keys = sorted([int(key) for key in unique_keys]) + result_array = np.zeros((len(input_data), len(sorted_keys))) + for _, (head_name, inner_dict) in enumerate(input_data.items()): + for key, value in inner_dict.items(): + key_index = sorted_keys.index(int(key)) + head_index = heads.index(head_name) + result_array[head_index][key_index] = value + return np.squeeze(result_array) + + class LRScheduler: def __init__(self, optimizer, args) -> None: self.scheduler = args.scheduler @@ -648,3 +890,20 @@ def create_error_table( ] ) return table + + +def check_folder_subfolder(folder_path): + entries = os.listdir(folder_path) + for entry in entries: + full_path = os.path.join(folder_path, entry) + if os.path.isdir(full_path): + return True + return False + + +def dict_to_namespace(dictionary): + # Convert the dictionary into an argparse.Namespace + namespace = argparse.Namespace() + for key, value in dictionary.items(): + setattr(namespace, key, value) + return namespace diff --git a/mace/tools/torch_tools.py b/mace/tools/torch_tools.py index 1ec3ecde..e42a74f8 100644 --- a/mace/tools/torch_tools.py +++ b/mace/tools/torch_tools.py @@ -64,6 +64,9 @@ def init_device(device_str: str) -> torch.device: assert torch.backends.mps.is_available(), "No MPS backend is available!" logging.info("Using MPS GPU acceleration") return torch.device("mps") + if device_str == "xpu": + torch.xpu.is_available() + return torch.device("xpu") logging.info("Using CPU") return torch.device("cpu") diff --git a/mace/tools/train.py b/mace/tools/train.py index b38bce16..1ab86f82 100644 --- a/mace/tools/train.py +++ b/mace/tools/train.py @@ -41,7 +41,14 @@ class SWAContainer: loss_fn: torch.nn.Module -def valid_err_log(valid_loss, eval_metrics, logger, log_errors, epoch=None): +def valid_err_log( + valid_loss, + eval_metrics, + logger, + log_errors, + epoch=None, + valid_loader_name="Default", +): eval_metrics["mode"] = "eval" eval_metrics["epoch"] = epoch logger.log(eval_metrics) @@ -53,17 +60,17 @@ def valid_err_log(valid_loss, eval_metrics, logger, log_errors, epoch=None): error_e = eval_metrics["rmse_e_per_atom"] * 1e3 error_f = eval_metrics["rmse_f"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A" + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A" ) elif ( log_errors == "PerAtomRMSEstressvirials" - and eval_metrics["rmse_stress_per_atom"] is not None + and eval_metrics["rmse_stress"] is not None ): error_e = eval_metrics["rmse_e_per_atom"] * 1e3 error_f = eval_metrics["rmse_f"] * 1e3 - error_stress = eval_metrics["rmse_stress_per_atom"] * 1e3 + error_stress = eval_metrics["rmse_stress"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A, RMSE_stress_per_atom={error_stress:8.1f} meV / A^3", + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A, RMSE_stress={error_stress:8.1f} meV / A^3", ) elif ( log_errors == "PerAtomRMSEstressvirials" @@ -73,7 +80,7 @@ def valid_err_log(valid_loss, eval_metrics, logger, log_errors, epoch=None): error_f = eval_metrics["rmse_f"] * 1e3 error_virials = eval_metrics["rmse_virials_per_atom"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A, RMSE_virials_per_atom={error_virials:8.1f} meV", + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A, RMSE_virials_per_atom={error_virials:8.1f} meV", ) elif ( log_errors == "PerAtomMAEstressvirials" @@ -99,31 +106,31 @@ def valid_err_log(valid_loss, eval_metrics, logger, log_errors, epoch=None): error_e = eval_metrics["rmse_e"] * 1e3 error_f = eval_metrics["rmse_f"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, RMSE_E={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A", + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, RMSE_E={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A", ) elif log_errors == "PerAtomMAE": error_e = eval_metrics["mae_e_per_atom"] * 1e3 error_f = eval_metrics["mae_f"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, MAE_E_per_atom={error_e:8.1f} meV, MAE_F={error_f:8.1f} meV / A", + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, MAE_E_per_atom={error_e:8.1f} meV, MAE_F={error_f:8.1f} meV / A", ) elif log_errors == "TotalMAE": error_e = eval_metrics["mae_e"] * 1e3 error_f = eval_metrics["mae_f"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, MAE_E={error_e:8.1f} meV, MAE_F={error_f:8.1f} meV / A", + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, MAE_E={error_e:8.1f} meV, MAE_F={error_f:8.1f} meV / A", ) elif log_errors == "DipoleRMSE": error_mu = eval_metrics["rmse_mu_per_atom"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, RMSE_MU_per_atom={error_mu:8.2f} mDebye", + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, RMSE_MU_per_atom={error_mu:8.2f} mDebye", ) elif log_errors == "EnergyDipoleRMSE": error_e = eval_metrics["rmse_e_per_atom"] * 1e3 error_f = eval_metrics["rmse_f"] * 1e3 error_mu = eval_metrics["rmse_mu_per_atom"] * 1e3 logging.info( - f"{inintial_phrase}: loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A, RMSE_Mu_per_atom={error_mu:8.2f} mDebye", + f"{inintial_phrase}: head: {valid_loader_name}, loss={valid_loss:8.4f}, RMSE_E_per_atom={error_e:8.1f} meV, RMSE_F={error_f:8.1f} meV / A, RMSE_Mu_per_atom={error_mu:8.2f} mDebye", ) @@ -131,7 +138,7 @@ def train( model: torch.nn.Module, loss_fn: torch.nn.Module, train_loader: DataLoader, - valid_loader: Dict[str, DataLoader], + valid_loaders: Dict[str, DataLoader], optimizer: torch.optim.Optimizer, lr_scheduler: torch.optim.lr_scheduler.ExponentialLR, start_epoch: int, @@ -170,17 +177,20 @@ def train( logging.info("Loss metrics on validation set") epoch = start_epoch - # # log validation loss before _any_ training - param_context = ema.average_parameters() if ema is not None else nullcontext() - with param_context: - valid_loss, eval_metrics = evaluate( + # log validation loss before _any_ training + valid_loss = 0.0 + for valid_loader_name, valid_loader in valid_loaders.items(): + valid_loss_head, eval_metrics = evaluate( model=model, loss_fn=loss_fn, data_loader=valid_loader, output_args=output_args, device=device, ) - valid_err_log(valid_loss, eval_metrics, logger, log_errors, None) + valid_err_log( + valid_loss_head, eval_metrics, logger, log_errors, None, valid_loader_name + ) + valid_loss = valid_loss_head # consider only the last head for the checkpoint while epoch < max_num_epochs: # LR scheduler and SWA update @@ -233,30 +243,40 @@ def train( if "ScheduleFree" in type(optimizer).__name__: optimizer.eval() with param_context: - valid_loss, eval_metrics = evaluate( - model=model_to_evaluate, - loss_fn=loss_fn, - data_loader=valid_loader, - output_args=output_args, - device=device, + valid_loss = 0.0 + wandb_log_dict = {} + for valid_loader_name, valid_loader in valid_loaders.items(): + valid_loss_head, eval_metrics = evaluate( + model=model_to_evaluate, + loss_fn=loss_fn, + data_loader=valid_loader, + output_args=output_args, + device=device, + ) + if rank == 0: + valid_err_log( + valid_loss_head, + eval_metrics, + logger, + log_errors, + epoch, + valid_loader_name, + ) + if log_wandb: + wandb_log_dict[valid_loader_name] = { + "epoch": epoch, + "valid_loss": valid_loss_head, + "valid_rmse_e_per_atom": eval_metrics[ + "rmse_e_per_atom" + ], + "valid_rmse_f": eval_metrics["rmse_f"], + } + valid_loss = ( + valid_loss_head # consider only the last head for the checkpoint ) + if log_wandb: + wandb.log(wandb_log_dict) if rank == 0: - valid_err_log( - valid_loss, - eval_metrics, - logger, - log_errors, - epoch, - ) - if log_wandb: - wandb_log_dict = { - "epoch": epoch, - "valid_loss": valid_loss, - "valid_rmse_e_per_atom": eval_metrics["rmse_e_per_atom"], - "valid_rmse_f": eval_metrics["rmse_f"], - } - wandb.log(wandb_log_dict) - if valid_loss >= lowest_loss: patience_counter += 1 if patience_counter >= patience and epoch < swa.start: @@ -422,7 +442,6 @@ def __init__(self, loss_fn: torch.nn.Module): "stress_computed", default=torch.tensor(0.0), dist_reduce_fx="sum" ) self.add_state("delta_stress", default=[], dist_reduce_fx="cat") - self.add_state("delta_stress_per_atom", default=[], dist_reduce_fx="cat") self.add_state( "virials_computed", default=torch.tensor(0.0), dist_reduce_fx="sum" ) @@ -451,10 +470,6 @@ def update(self, batch, output): # pylint: disable=arguments-differ if output.get("stress") is not None and batch.stress is not None: self.stress_computed += 1.0 self.delta_stress.append(batch.stress - output["stress"]) - self.delta_stress_per_atom.append( - (batch.stress - output["stress"]) - / (batch.ptr[1:] - batch.ptr[:-1]).view(-1, 1, 1) - ) if output.get("virials") is not None and batch.virials is not None: self.virials_computed += 1.0 self.delta_virials.append(batch.virials - output["virials"]) @@ -497,10 +512,8 @@ def compute(self): aux["q95_f"] = compute_q95(delta_fs) if self.stress_computed: delta_stress = self.convert(self.delta_stress) - delta_stress_per_atom = self.convert(self.delta_stress_per_atom) aux["mae_stress"] = compute_mae(delta_stress) aux["rmse_stress"] = compute_rmse(delta_stress) - aux["rmse_stress_per_atom"] = compute_rmse(delta_stress_per_atom) aux["q95_stress"] = compute_q95(delta_stress) if self.virials_computed: delta_virials = self.convert(self.delta_virials) diff --git a/mace/tools/utils.py b/mace/tools/utils.py index 762d9880..28a77efe 100644 --- a/mace/tools/utils.py +++ b/mace/tools/utils.py @@ -121,26 +121,6 @@ def atomic_numbers_to_indices( return to_index_fn(atomic_numbers) -def get_optimizer( - name: str, - amsgrad: bool, - learning_rate: float, - weight_decay: float, - parameters: Iterable[torch.Tensor], -) -> torch.optim.Optimizer: - if name == "adam": - return torch.optim.Adam( - parameters, lr=learning_rate, amsgrad=amsgrad, weight_decay=weight_decay - ) - - if name == "adamw": - return torch.optim.AdamW( - parameters, lr=learning_rate, amsgrad=amsgrad, weight_decay=weight_decay - ) - - raise RuntimeError(f"Unknown optimizer '{name}'") - - class UniversalEncoder(json.JSONEncoder): def default(self, o): if isinstance(o, np.integer): @@ -161,7 +141,6 @@ def __init__(self, directory: str, tag: str) -> None: self.path = os.path.join(self.directory, self.filename) def log(self, d: Dict[str, Any]) -> None: - logging.debug(f"Saving info: {self.path}") os.makedirs(name=self.directory, exist_ok=True) with open(self.path, mode="a", encoding="utf-8") as f: f.write(json.dumps(d, cls=UniversalEncoder)) diff --git a/pyproject.toml b/pyproject.toml index 05037dc7..489bc6e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ disable = [ "not-callable", "logging-fstring-interpolation", "logging-not-lazy", + "logging-too-many-args", "invalid-name", "too-few-public-methods", "too-many-instance-attributes", diff --git a/setup.cfg b/setup.cfg index 81e0b661..13d55161 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,6 +27,7 @@ install_requires = python-hostlist configargparse GitPython + pyYAML tqdm # for plotting: matplotlib @@ -40,9 +41,11 @@ console_scripts = mace_plot_train = mace.cli.plot_train:main mace_run_train = mace.cli.run_train:main mace_prepare_data = mace.cli.preprocess_data:main + mace_finetuning = mace.cli.fine_tuning_select:main [options.extras_require] wandb = wandb +fpsample = fpsample dev = black isort diff --git a/tests/test_calculator.py b/tests/test_calculator.py index ef048854..21941d6f 100644 --- a/tests/test_calculator.py +++ b/tests/test_calculator.py @@ -6,6 +6,7 @@ import ase.io import numpy as np import pytest +import torch from ase import build from ase.atoms import Atoms from ase.calculators.test import gradient_test @@ -381,12 +382,12 @@ def test_calculator_node_energy(fitting_configs, trained_model): trained_model.calculate(at) node_energies = trained_model.results["node_energy"] batch = trained_model._atoms_to_batch(at) # pylint: disable=protected-access + node_heads = batch["head"][batch["batch"]] + num_atoms_arange = torch.arange(batch["positions"].shape[0]) node_e0 = ( - trained_model.models[0] - .atomic_energies_fn(batch["node_attrs"]) - .detach() - .numpy() + trained_model.models[0].atomic_energies_fn(batch["node_attrs"]).detach() ) + node_e0 = node_e0[num_atoms_arange, node_heads].cpu().numpy() energy_via_nodes = np.sum(node_energies + node_e0) energy = trained_model.results["energy"] np.testing.assert_allclose(energy, energy_via_nodes, atol=1e-6) diff --git a/tests/test_data.py b/tests/test_data.py index e893f03c..9e0c49e6 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -144,7 +144,7 @@ def test_basic(self): ] ) - indices, shifts, unit_shifts = get_neighborhood(positions, cutoff=1.5) + indices, shifts, unit_shifts, _ = get_neighborhood(positions, cutoff=1.5) assert indices.shape == (2, 4) assert shifts.shape == (4, 3) assert unit_shifts.shape == (4, 3) @@ -158,7 +158,7 @@ def test_signs(self): ) cell = np.array([[2.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) - edge_index, shifts, unit_shifts = get_neighborhood( + edge_index, shifts, unit_shifts, _ = get_neighborhood( positions, cutoff=3.5, pbc=(True, False, False), cell=cell ) num_edges = 10 @@ -172,7 +172,7 @@ def test_periodic_edge(): atoms = ase.build.bulk("Cu", "fcc") dist = np.linalg.norm(atoms.cell[0]).item() config = config_from_atoms(atoms) - edge_index, shifts, _ = get_neighborhood( + edge_index, shifts, _, _ = get_neighborhood( config.positions, cutoff=1.05 * dist, pbc=(True, True, True), cell=config.cell ) sender, receiver = edge_index @@ -190,7 +190,7 @@ def test_half_periodic(): atoms = ase.build.fcc111("Al", size=(3, 3, 1), vacuum=0.0) assert all(atoms.pbc == (True, True, False)) config = config_from_atoms(atoms) # first shell dist is 2.864A - edge_index, shifts, _ = get_neighborhood( + edge_index, shifts, _, _ = get_neighborhood( config.positions, cutoff=2.9, pbc=(True, True, False), cell=config.cell ) sender, receiver = edge_index diff --git a/tests/test_foundations.py b/tests/test_foundations.py index 69963c67..b1724629 100644 --- a/tests/test_foundations.py +++ b/tests/test_foundations.py @@ -9,7 +9,7 @@ from mace import data, modules, tools from mace.calculators import mace_mp, mace_off from mace.tools import torch_geometric -from mace.tools.finetuning_utils import load_foundations +from mace.tools.finetuning_utils import load_foundations_elements from mace.tools.scripts_utils import extract_config_mace_model from mace.tools.utils import AtomicNumberTable @@ -71,7 +71,7 @@ def test_foundations(): default_dtype="float64", ) model_foundations = calc.models[0] - model_loaded = load_foundations( + model_loaded = load_foundations_elements( model, model_foundations, table=table, @@ -96,6 +96,75 @@ def test_foundations(): assert torch.allclose(forces, forces_loaded) +def test_multi_reference(): + config_multi = data.Configuration( + atomic_numbers=molecule("H2COH").numbers, + positions=molecule("H2COH").positions, + forces=molecule("H2COH").positions, + energy=-1.5, + charges=molecule("H2COH").numbers, + dipole=np.array([-1.5, 1.5, 2.0]), + head="MP2", + ) + table_multi = tools.AtomicNumberTable([1, 6, 8]) + atomic_energies_multi = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], dtype=float) + + # Create MACE model + model_config = dict( + r_max=6, + num_bessel=10, + num_polynomial_cutoff=5, + max_ell=3, + interaction_cls=modules.interaction_classes[ + "RealAgnosticResidualInteractionBlock" + ], + interaction_cls_first=modules.interaction_classes[ + "RealAgnosticResidualInteractionBlock" + ], + num_interactions=2, + num_elements=3, + hidden_irreps=o3.Irreps("128x0e + 128x1o"), + MLP_irreps=o3.Irreps("16x0e"), + gate=torch.nn.functional.silu, + atomic_energies=atomic_energies_multi, + avg_num_neighbors=61, + atomic_numbers=table.zs, + correlation=3, + radial_type="bessel", + atomic_inter_scale=[1.0, 1.0], + atomic_inter_shift=[0.0, 0.0], + heads=["MP2", "DFT"], + ) + model = modules.ScaleShiftMACE(**model_config) + calc_foundation = mace_mp(device="cpu", default_dtype="float64") + model_loaded = load_foundations_elements( + model, + calc_foundation.models[0], + table=table, + load_readout=True, + use_shift=False, + max_L=1, + ) + atomic_data = data.AtomicData.from_config( + config_multi, z_table=table_multi, cutoff=6.0, heads=["MP2", "DFT"] + ) + data_loader = torch_geometric.dataloader.DataLoader( + dataset=[atomic_data, atomic_data], + batch_size=2, + shuffle=True, + drop_last=False, + ) + batch = next(iter(data_loader)) + forces_loaded = model_loaded(batch)["forces"] + calc_foundation = mace_mp(device="cpu", default_dtype="float64") + atoms = molecule("H2COH") + atoms.calc = calc_foundation + forces = atoms.get_forces() + assert np.allclose( + forces, forces_loaded.detach().numpy()[:5, :], atol=1e-5, rtol=1e-5 + ) + + @pytest.mark.parametrize( "model", [ @@ -113,12 +182,8 @@ def test_extract_config(model): model_copy.load_state_dict(model.state_dict()) z_table = AtomicNumberTable([int(z) for z in model.atomic_numbers]) atomic_data = data.AtomicData.from_config(config, z_table=z_table, cutoff=6.0) - atomic_data2 = data.AtomicData.from_config( - config_rotated, z_table=z_table, cutoff=6.0 - ) - data_loader = torch_geometric.dataloader.DataLoader( - dataset=[atomic_data, atomic_data2], + dataset=[atomic_data, atomic_data], batch_size=2, shuffle=True, drop_last=False, diff --git a/tests/test_models.py b/tests/test_models.py index 18ef536b..8e8c60da 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -196,3 +196,56 @@ def test_energy_dipole_mace(): np.array(rot @ output["dipole"][0].detach().numpy()), output["dipole"][1].detach().numpy(), ) + + +def test_mace_multi_reference(): + atomic_energies_multi = np.array([[1.0, 3.0], [0.0, 0.0]], dtype=float) + model_config = dict( + r_max=5, + num_bessel=8, + num_polynomial_cutoff=6, + max_ell=3, + interaction_cls=modules.interaction_classes[ + "RealAgnosticResidualInteractionBlock" + ], + interaction_cls_first=modules.interaction_classes[ + "RealAgnosticResidualInteractionBlock" + ], + num_interactions=2, + num_elements=2, + hidden_irreps=o3.Irreps("96x0e + 96x1o"), + MLP_irreps=o3.Irreps("16x0e"), + gate=torch.nn.functional.silu, + atomic_energies=atomic_energies_multi, + avg_num_neighbors=8, + atomic_numbers=table.zs, + distance_transform=True, + pair_repulsion=True, + correlation=3, + heads=["Default", "dft"], + # radial_type="chebyshev", + atomic_inter_scale=[1.0, 1.0], + atomic_inter_shift=[0.0, 0.1], + ) + model = modules.ScaleShiftMACE(**model_config) + model_compiled = jit.compile(model) + config.head = "Default" + config_rotated.head = "dft" + atomic_data = data.AtomicData.from_config( + config, z_table=table, cutoff=3.0, heads=["Default", "dft"] + ) + atomic_data2 = data.AtomicData.from_config( + config_rotated, z_table=table, cutoff=3.0, heads=["Default", "dft"] + ) + + data_loader = torch_geometric.dataloader.DataLoader( + dataset=[atomic_data, atomic_data2], + batch_size=2, + shuffle=True, + drop_last=False, + ) + batch = next(iter(data_loader)) + output1 = model(batch.to_dict(), training=True) + output2 = model_compiled(batch.to_dict(), training=True) + assert torch.allclose(output1["energy"][0], output2["energy"][0]) + assert output2["energy"].shape[0] == 2 diff --git a/tests/test_modules.py b/tests/test_modules.py index a5166bd2..5539ceb1 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -106,7 +106,7 @@ def test_atomic_energies(self): ) batch = next(iter(data_loader)) - energies = energies_block(batch.node_attrs) + energies = energies_block(batch.node_attrs).squeeze(-1) out = scatter.scatter_sum(src=energies, index=batch.batch, dim=-1, reduce="sum") out = to_numpy(out) assert np.allclose(out, np.array([5.0, 5.0])) diff --git a/tests/test_run_train.py b/tests/test_run_train.py index 5f39805b..899f5749 100644 --- a/tests/test_run_train.py +++ b/tests/test_run_train.py @@ -278,6 +278,114 @@ def test_run_train_no_stress(tmp_path, fitting_configs): assert np.allclose(Es, ref_Es) +def test_run_train_multihead(tmp_path, fitting_configs): + fitting_configs_dft = [] + fitting_configs_mp2 = [] + fitting_configs_ccd = [] + for _, c in enumerate(fitting_configs): + c_dft = c.copy() + c_dft.info["head"] = "DFT" + fitting_configs_dft.append(c_dft) + + c_mp2 = c.copy() + c_mp2.info["head"] = "MP2" + fitting_configs_mp2.append(c_mp2) + + c_ccd = c.copy() + c_ccd.info["head"] = "CCD" + fitting_configs_ccd.append(c_ccd) + ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs_dft) + ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs_mp2) + ase.io.write(tmp_path / "fit_multihead_ccd.xyz", fitting_configs_ccd) + + heads = { + "DFT": {"train_file": f"{str(tmp_path)}/fit_multihead_dft.xyz"}, + "MP2": {"train_file": f"{str(tmp_path)}/fit_multihead_mp2.xyz"}, + "CCD": {"train_file": f"{str(tmp_path)}/fit_multihead_ccd.xyz"}, + } + yaml_str = "heads:\n" + for key, value in heads.items(): + yaml_str += f" {key}:\n" + for sub_key, sub_value in value.items(): + yaml_str += f" {sub_key}: {sub_value}\n" + filename = tmp_path / "config.yaml" + with open(filename, "w", encoding="utf-8") as file: + file.write(yaml_str) + + mace_params = _mace_params.copy() + mace_params["valid_fraction"] = 0.1 + mace_params["checkpoints_dir"] = str(tmp_path) + mace_params["model_dir"] = str(tmp_path) + mace_params["loss"] = "weighted" + mace_params["hidden_irreps"] = "128x0e" + mace_params["r_max"] = 6.0 + mace_params["default_dtype"] = "float64" + mace_params["num_radial_basis"] = 10 + mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock" + mace_params["config"] = tmp_path / "config.yaml" + mace_params["batch_size"] = 2 + mace_params["num_samples_pt"] = 50 + mace_params["subselect_pt"] = "random" + # make sure run_train.py is using the mace that is currently being tested + run_env = os.environ.copy() + sys.path.insert(0, str(Path(__file__).parent.parent)) + run_env["PYTHONPATH"] = ":".join(sys.path) + print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"]) + + cmd = ( + sys.executable + + " " + + str(run_train) + + " " + + " ".join( + [ + (f"--{k}={v}" if v is not None else f"--{k}") + for k, v in mace_params.items() + ] + ) + ) + + p = subprocess.run(cmd.split(), env=run_env, check=True) + assert p.returncode == 0 + + calc = MACECalculator( + tmp_path / "MACE.model", device="cpu", default_dtype="float64" + ) + + Es = [] + for at in fitting_configs: + at.calc = calc + Es.append(at.get_potential_energy()) + + print("Es", Es) + # from a run on 02/09/2024 on develop branch + ref_Es = [ + 0.0, + 0.0, + 0.10637113905361611, + -0.012499594026624754, + 0.08983077108171753, + 0.21071322543112597, + -0.028921849222784398, + -0.02423359575741567, + 0.022923252188079057, + -0.02048334610058991, + 0.4349711162741364, + -0.04455577015569887, + -0.09765806785570091, + 0.16013134616829822, + 0.0758442928017698, + -0.05931856557011721, + 0.33964473532953265, + 0.134338442158641, + 0.18024119757783053, + -0.18914740992058765, + -0.06503477155294624, + 0.03436649147415213, + ] + assert np.allclose(Es, ref_Es) + + def test_run_train_foundation(tmp_path, fitting_configs): ase.io.write(tmp_path / "fit.xyz", fitting_configs) @@ -289,9 +397,13 @@ def test_run_train_foundation(tmp_path, fitting_configs): mace_params["foundation_model"] = "small" mace_params["hidden_irreps"] = "128x0e" mace_params["r_max"] = 6.0 - mace_params["default_dtype"] = "float32" + mace_params["default_dtype"] = "float64" mace_params["num_radial_basis"] = 10 mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock" + mace_params["multiheads_finetuning"] = False + print("mace_params", mace_params) + # mace_params["num_samples_pt"] = 50 + # mace_params["subselect_pt"] = "random" # make sure run_train.py is using the mace that is currently being tested run_env = os.environ.copy() sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -315,7 +427,7 @@ def test_run_train_foundation(tmp_path, fitting_configs): assert p.returncode == 0 calc = MACECalculator( - model_paths=tmp_path / "MACE.model", device="cpu", default_dtype="float32" + tmp_path / "MACE.model", device="cpu", default_dtype="float64" ) Es = [] @@ -350,3 +462,112 @@ def test_run_train_foundation(tmp_path, fitting_configs): 0.7301387786865234, ] assert np.allclose(Es, ref_Es) + + +def test_run_train_foundation_multihead(tmp_path, fitting_configs): + fitting_configs_dft = [] + fitting_configs_mp2 = [] + for i, c in enumerate(fitting_configs): + if i in (0, 1): + c_dft = c.copy() + c_dft.info["head"] = "DFT" + fitting_configs_dft.append(c_dft) + fitting_configs_dft.append(c) + c_mp2 = c.copy() + c_mp2.info["head"] = "MP2" + fitting_configs_mp2.append(c_mp2) + elif i % 2 == 0: + c.info["head"] = "DFT" + fitting_configs_dft.append(c) + else: + c.info["head"] = "MP2" + fitting_configs_mp2.append(c) + ase.io.write(tmp_path / "fit_multihead_dft.xyz", fitting_configs_dft) + ase.io.write(tmp_path / "fit_multihead_mp2.xyz", fitting_configs_mp2) + + heads = { + "DFT": {"train_file": f"{str(tmp_path)}/fit_multihead_dft.xyz"}, + "MP2": {"train_file": f"{str(tmp_path)}/fit_multihead_mp2.xyz"}, + } + yaml_str = "heads:\n" + for key, value in heads.items(): + yaml_str += f" {key}:\n" + for sub_key, sub_value in value.items(): + yaml_str += f" {sub_key}: {sub_value}\n" + filename = tmp_path / "config.yaml" + with open(filename, "w", encoding="utf-8") as file: + file.write(yaml_str) + mace_params = _mace_params.copy() + mace_params["valid_fraction"] = 0.1 + mace_params["checkpoints_dir"] = str(tmp_path) + mace_params["model_dir"] = str(tmp_path) + mace_params["config"] = tmp_path / "config.yaml" + mace_params["loss"] = "weighted" + mace_params["foundation_model"] = "small" + mace_params["hidden_irreps"] = "128x0e" + mace_params["r_max"] = 6.0 + mace_params["default_dtype"] = "float64" + mace_params["num_radial_basis"] = 10 + mace_params["interaction_first"] = "RealAgnosticResidualInteractionBlock" + mace_params["batch_size"] = 2 + mace_params["valid_batch_size"] = 1 + mace_params["num_samples_pt"] = 50 + mace_params["subselect_pt"] = "random" + # make sure run_train.py is using the mace that is currently being tested + run_env = os.environ.copy() + sys.path.insert(0, str(Path(__file__).parent.parent)) + run_env["PYTHONPATH"] = ":".join(sys.path) + print("DEBUG subprocess PYTHONPATH", run_env["PYTHONPATH"]) + + cmd = ( + sys.executable + + " " + + str(run_train) + + " " + + " ".join( + [ + (f"--{k}={v}" if v is not None else f"--{k}") + for k, v in mace_params.items() + ] + ) + ) + + p = subprocess.run(cmd.split(), env=run_env, check=True) + assert p.returncode == 0 + + calc = MACECalculator( + tmp_path / "MACE.model", device="cpu", default_dtype="float64" + ) + + Es = [] + for at in fitting_configs: + at.calc = calc + Es.append(at.get_potential_energy()) + + print("Es", Es) + # from a run on 20/08/2024 on commit + ref_Es = [ + 1.654685616493225, + 0.44693732261657715, + 0.8741313815116882, + 0.569085955619812, + 0.7161882519721985, + 0.8654778599739075, + 0.8722733855247498, + 0.49582308530807495, + 0.814422607421875, + 0.7027317881584167, + 0.7196993827819824, + 0.517953097820282, + 0.8631765246391296, + 0.4679797887802124, + 0.8163984417915344, + 0.4252359867095947, + 1.0861445665359497, + 0.6829671263694763, + 0.7136879563331604, + 0.5160345435142517, + 0.7002358436584473, + 0.5574042201042175, + ] + assert np.allclose(Es, ref_Es, atol=1e-1)