Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow creation of MACECalculator without needing to write a checkpoint to disk. #535

Merged
merged 2 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion mace/calculators/foundations_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,4 +227,6 @@ def mace_anicc(
print(
"Using ANI couple cluster model for MACECalculator, see https://doi.org/10.1063/5.0155322"
)
return MACECalculator(model_path, device=device, default_dtype="float64")
return MACECalculator(
model_paths=model_path, device=device, default_dtype="float64"
)
97 changes: 67 additions & 30 deletions mace/calculators/mace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
###########################################################################################


import logging
from glob import glob
from pathlib import Path
from typing import Union
Expand All @@ -18,7 +19,7 @@
from mace.modules.utils import extract_invariant
from mace.tools import torch_geometric, torch_tools, utils
from mace.tools.compile import prepare
from mace.tools.scripts_utils import extract_load
from mace.tools.scripts_utils import extract_model


def get_model_dtype(model: torch.nn.Module) -> torch.dtype:
Expand Down Expand Up @@ -49,8 +50,9 @@ class MACECalculator(Calculator):

def __init__(
self,
model_paths: Union[list, str],
device: str,
model_paths: Union[list, str] | None = None,
device: str | None = None,
models: Union[list[torch.nn.Module], torch.nn.Module] | None = None,
energy_units_to_eV: float = 1.0,
length_units_to_A: float = 1.0,
default_dtype="",
Expand All @@ -61,6 +63,24 @@ def __init__(
**kwargs,
):
Calculator.__init__(self, **kwargs)

if "model_path" in kwargs:
deprecation_message = (
"'model_path' argument is deprecated, please use 'model_paths'"
)
if model_paths is None:
logging.warning(f"{deprecation_message} in the future.")
model_paths = kwargs["model_path"]
else:
raise ValueError(
f"both 'model_path' and 'model_paths' given, {deprecation_message} only."
)

if (model_paths is None) == (models is None):
raise ValueError(
"Exactly one of 'model_paths' or 'models' must be provided"
)

self.results = {}

self.model_type = model_type
Expand Down Expand Up @@ -89,53 +109,70 @@ def __init__(
f"Give a valid model_type: [MACE, DipoleMACE, EnergyDipoleMACE], {model_type} not supported"
)

if "model_path" in kwargs:
print("model_path argument deprecated, use model_paths")
model_paths = kwargs["model_path"]

if isinstance(model_paths, str):
# Find all models that satisfy the wildcard (e.g. mace_model_*.pt)
model_paths_glob = glob(model_paths)
if len(model_paths_glob) == 0:
raise ValueError(f"Couldn't find MACE model files: {model_paths}")
model_paths = model_paths_glob
elif isinstance(model_paths, Path):
model_paths = [model_paths]
if len(model_paths) == 0:
raise ValueError("No mace file names supplied")
self.num_models = len(model_paths)
if len(model_paths) > 1:
print(f"Running committee mace with {len(model_paths)} models")
if model_paths is not None:
if isinstance(model_paths, str):
# Find all models that satisfy the wildcard (e.g. mace_model_*.pt)
model_paths_glob = glob(model_paths)

if len(model_paths_glob) == 0:
raise ValueError(f"Couldn't find MACE model files: {model_paths}")

model_paths = model_paths_glob
elif isinstance(model_paths, Path):
model_paths = [model_paths]

if len(model_paths) == 0:
raise ValueError("No mace file names supplied")
self.num_models = len(model_paths)

# Load models from files
self.models = [
torch.load(f=model_path, map_location=device)
for model_path in model_paths
]

elif models is not None:
if not isinstance(models, list):
models = [models]

if len(models) == 0:
raise ValueError("No models supplied")

self.models = models
self.num_models = len(models)

if self.num_models > 1:
print(f"Running committee mace with {self.num_models} models")

if model_type in ["MACE", "EnergyDipoleMACE"]:
self.implemented_properties.extend(
["energies", "energy_var", "forces_comm", "stress_var"]
)
elif model_type == "DipoleMACE":
self.implemented_properties.extend(["dipole_var"])

if compile_mode is not None:
print(f"Torch compile is enabled with mode: {compile_mode}")
self.models = [
torch.compile(
prepare(extract_load)(f=model_path, map_location=device),
prepare(extract_model)(model=model, map_location=device),
mode=compile_mode,
fullgraph=fullgraph,
)
for model_path in model_paths
for model in models
]
self.use_compile = True
else:
self.models = [
torch.load(f=model_path, map_location=device)
for model_path in model_paths
]
self.use_compile = False

# Ensure all models are on the same device
for model in self.models:
model.to(device) # shouldn't be necessary but seems to help with GPU
model.to(device)

r_maxs = [model.r_max.cpu() for model in self.models]
r_maxs = np.array(r_maxs)
assert np.all(
r_maxs == r_maxs[0]
), "committee r_max are not all the same {' '.join(r_maxs)}"
if not np.all(r_maxs == r_maxs[0]):
raise ValueError(f"committee r_max are not all the same {' '.join(r_maxs)}")
self.r_max = float(r_maxs[0])

self.device = torch_tools.init_device(device)
Expand Down
4 changes: 2 additions & 2 deletions mace/cli/active_learning_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def run(args: argparse.Namespace) -> None:
atoms_index = args.config_index

mace_calc = MACECalculator(
mace_fname,
args.device,
model_paths=mace_fname,
device=args.device,
default_dtype=args.default_dtype,
)

Expand Down
7 changes: 3 additions & 4 deletions mace/tools/scripts_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,9 @@ def radial_to_transform(radial):


def extract_load(f: str, map_location: str = "cpu") -> torch.nn.Module:
model = torch.load(f=f, map_location=map_location)
model_copy = model.__class__(**extract_config_mace_model(model))
model_copy.load_state_dict(model.state_dict())
return model_copy.to(map_location)
return extract_model(
torch.load(f=f, map_location=map_location), map_location=map_location
)


def extract_model(model: torch.nn.Module, map_location: str = "cpu") -> torch.nn.Module:
Expand Down
24 changes: 19 additions & 5 deletions tests/test_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def trained_model_fixture(tmp_path_factory, fitting_configs):

assert p.returncode == 0

return MACECalculator(tmp_path / "MACE.model", device="cpu")
return MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")


@pytest.fixture(scope="module", name="trained_equivariant_model")
Expand Down Expand Up @@ -174,7 +174,7 @@ def trained_model_equivariant_fixture(tmp_path_factory, fitting_configs):

assert p.returncode == 0

return MACECalculator(tmp_path / "MACE.model", device="cpu")
return MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")


@pytest.fixture(scope="module", name="trained_dipole_model")
Expand Down Expand Up @@ -239,7 +239,7 @@ def trained_dipole_fixture(tmp_path_factory, fitting_configs):
assert p.returncode == 0

return MACECalculator(
tmp_path / "MACE.model", device="cpu", model_type="DipoleMACE"
model_paths=tmp_path / "MACE.model", device="cpu", model_type="DipoleMACE"
)


Expand Down Expand Up @@ -305,7 +305,7 @@ def trained_energy_dipole_fixture(tmp_path_factory, fitting_configs):
assert p.returncode == 0

return MACECalculator(
tmp_path / "MACE.model", device="cpu", model_type="EnergyDipoleMACE"
model_paths=tmp_path / "MACE.model", device="cpu", model_type="EnergyDipoleMACE"
)


Expand Down Expand Up @@ -374,7 +374,7 @@ def trained_committee_fixture(tmp_path_factory, fitting_configs):

_model_paths.append(tmp_path / f"MACE{seed}.model")

return MACECalculator(_model_paths, device="cpu")
return MACECalculator(model_paths=_model_paths, device="cpu")


def test_calculator_node_energy(fitting_configs, trained_model):
Expand Down Expand Up @@ -432,6 +432,20 @@ def test_calculator_committee(fitting_configs, trained_committee):
assert forces_var.shape == at.calc.results["forces"].shape


def test_calculator_from_model(fitting_configs, trained_committee):
# test single model
test_calculator_forces(
fitting_configs,
trained_model=MACECalculator(models=trained_committee.models[0], device="cpu"),
)

# test committee model
test_calculator_committee(
fitting_configs,
trained_committee=MACECalculator(models=trained_committee.models, device="cpu"),
)


def test_calculator_dipole(fitting_configs, trained_dipole_model):
at = fitting_configs[2].copy()
at.calc = trained_dipole_model
Expand Down
12 changes: 6 additions & 6 deletions tests/test_run_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_run_train(tmp_path, fitting_configs):
p = subprocess.run(cmd.split(), env=run_env, check=True)
assert p.returncode == 0

calc = MACECalculator(tmp_path / "MACE.model", device="cpu")
calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")

Es = []
for at in fitting_configs:
Expand Down Expand Up @@ -171,7 +171,7 @@ def test_run_train_missing_data(tmp_path, fitting_configs):
p = subprocess.run(cmd.split(), env=run_env, check=True)
assert p.returncode == 0

calc = MACECalculator(tmp_path / "MACE.model", device="cpu")
calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")

Es = []
for at in fitting_configs:
Expand Down Expand Up @@ -242,7 +242,7 @@ def test_run_train_no_stress(tmp_path, fitting_configs):
p = subprocess.run(cmd.split(), env=run_env, check=True)
assert p.returncode == 0

calc = MACECalculator(tmp_path / "MACE.model", device="cpu")
calc = MACECalculator(model_paths=tmp_path / "MACE.model", device="cpu")

Es = []
for at in fitting_configs:
Expand Down Expand Up @@ -349,7 +349,7 @@ def test_run_train_multihead(tmp_path, fitting_configs):
assert p.returncode == 0

calc = MACECalculator(
tmp_path / "MACE.model", device="cpu", default_dtype="float64"
model_paths=tmp_path / "MACE.model", device="cpu", default_dtype="float64"
)

Es = []
Expand Down Expand Up @@ -427,7 +427,7 @@ def test_run_train_foundation(tmp_path, fitting_configs):
assert p.returncode == 0

calc = MACECalculator(
tmp_path / "MACE.model", device="cpu", default_dtype="float64"
model_paths=tmp_path / "MACE.model", device="cpu", default_dtype="float64"
)

Es = []
Expand Down Expand Up @@ -536,7 +536,7 @@ def test_run_train_foundation_multihead(tmp_path, fitting_configs):
assert p.returncode == 0

calc = MACECalculator(
tmp_path / "MACE.model", device="cpu", default_dtype="float64"
model_paths=tmp_path / "MACE.model", device="cpu", default_dtype="float64"
)

Es = []
Expand Down
Loading