Skip to content

Commit

Permalink
Use .json.gz rather than .json to save space, and update tutorial…
Browse files Browse the repository at this point in the history
…s & docstrings
  • Loading branch information
kavanase committed Jul 1, 2024
1 parent 50839a5 commit 1a1b5cd
Show file tree
Hide file tree
Showing 641 changed files with 258 additions and 503 deletions.
8 changes: 4 additions & 4 deletions docs/Tips.rst
Original file line number Diff line number Diff line change
Expand Up @@ -465,21 +465,21 @@ various stages in the tutorials, this can be achieved using the ``dumpfn``/``loa
.. code-block:: python
# save a DefectThermodynamics object to a JSON file
defect_thermo.to_json("MgO_DefectThermodynamics.json")
defect_thermo.to_json("MgO_DefectThermodynamics.json.gz")
# then later in a different python session or notebook, we can reload the
# DefectThermodynamics object from the JSON file, containing all the associated info
from doped.thermodynamics import DefectThermodynamics
defect_thermodynamics = DefectThermodynamics.from_json("MgO_DefectThermodynamics.json")
defect_thermodynamics = DefectThermodynamics.from_json("MgO_DefectThermodynamics.json.gz")
# alternatively, we can directly use the monty dumpfn/loadfn functions
# directly on any doped object, e.g. with our ``DefectsSet`` object
# containing all the info on the generated VASP input files:
from monty.serialization import dumpfn, loadfn
dumpfn(obj=defects_set, fn="MgO_DefectsSet.json")
dumpfn(obj=defects_set, fn="MgO_DefectsSet.json.gz")
# and again later reload the object from the JSON file
defects_set = loadfn("MgO_DefectsSet.json")
defects_set = loadfn("MgO_DefectsSet.json.gz")
.. note::

Expand Down
4 changes: 2 additions & 2 deletions doped/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ def __init__(
calculation provenance. Can be reloaded using the ``loadfn`` function
from ``monty.serialization`` (and then input to ``DefectThermodynamics``
etc.). If ``None`` (default), set as
``{Host Chemical Formula}_defect_dict.json``.
``{Host Chemical Formula}_defect_dict.json.gz``.
If ``False``, no json file is saved.
parse_projected_eigen (bool):
Whether to parse the projected eigenvalues & orbitals from the bulk and
Expand Down Expand Up @@ -1167,7 +1167,7 @@ def _call_multiple_corrections_tolerance_warning(correction_errors, type="FNV"):
formula = next(
iter(self.defect_dict.values())
).defect.structure.composition.get_reduced_formula_and_factor(iupac_ordering=True)[0]
self.json_filename = f"{formula}_defect_dict.json"
self.json_filename = f"{formula}_defect_dict.json.gz"

dumpfn(self.defect_dict, os.path.join(self.output_path, self.json_filename)) # type: ignore

Expand Down
22 changes: 16 additions & 6 deletions doped/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,25 @@ def to_json(self, filename: Optional[PathLike] = None):
Save the ``DefectEntry`` object to a json file, which can be reloaded
with the ``DefectEntry.from_json()`` class method.
Note that file extensions with ".gz" will be automatically compressed
(recommended to save space)!
Args:
filename (PathLike):
Filename to save json file as. If None, the filename will
be set as ``"{DefectEntry.name}.json"``.
be set as ``{DefectEntry.name}.json.gz``.
"""
if filename is None:
filename = f"{self.name}.json"
filename = f"{self.name}.json.gz"

dumpfn(self, filename)

@classmethod
def from_json(cls, filename: str):
"""
Load a ``DefectEntry`` object from a json file.
Load a ``DefectEntry`` object from a json(.gz) file.
Note that ``.json.gz`` files can be loaded directly.
Args:
filename (PathLike):
Expand Down Expand Up @@ -1947,20 +1952,25 @@ def to_json(self, filename: Optional[PathLike] = None):
Save the ``Defect`` object to a json file, which can be reloaded with
the `` Defect``.from_json()`` class method.
Note that file extensions with ".gz" will be automatically compressed
(recommended to save space)!
Args:
filename (PathLike):
Filename to save json file as. If None, the filename will
be set as "{Defect.name}.json".
be set as "{Defect.name}.json.gz".
"""
if filename is None:
filename = f"{self.name}.json"
filename = f"{self.name}.json.gz"

dumpfn(self, filename)

@classmethod
def from_json(cls, filename: str):
"""
Load a ``Defect`` object from a json file.
Load a ``Defect`` object from a json(.gz) file.
Note that ``.json.gz`` files can be loaded directly.
Args:
filename (PathLike):
Expand Down
16 changes: 11 additions & 5 deletions doped/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2005,23 +2005,29 @@ def to_json(self, filename: Optional[PathLike] = None):
Save the ``DefectsGenerator`` object as a json file, which can be
reloaded with the ``DefectsGenerator.from_json()`` class method.
Note that file extensions with ".gz" will be automatically compressed
(recommended to save space)!
Args:
filename (PathLike): Filename to save json file as. If None, the filename will be
set as "{Chemical Formula}_defects_generator.json" where {Chemical Formula}
is the chemical formula of the host material.
filename (PathLike):
Filename to save json file as. If None, the filename will be
set as ``{Chemical Formula}_defects_generator.json.gz`` where
{Chemical Formula} is the chemical formula of the host material.
"""
if filename is None:
formula = self.primitive_structure.composition.get_reduced_formula_and_factor(
iupac_ordering=True
)[0]
filename = f"{formula}_defects_generator.json"
filename = f"{formula}_defects_generator.json.gz"

dumpfn(self, filename)

@classmethod
def from_json(cls, filename: PathLike):
"""
Load a ``DefectsGenerator`` object from a json file.
Load a ``DefectsGenerator`` object from a json(.gz) file.
Note that ``.json.gz`` files can be loaded directly.
Args:
filename (PathLike):
Expand Down
16 changes: 11 additions & 5 deletions doped/thermodynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,23 +622,29 @@ def to_json(self, filename: Optional[PathLike] = None):
Save the ``DefectThermodynamics`` object as a json file, which can be
reloaded with the ``DefectThermodynamics.from_json()`` class method.
Note that file extensions with ".gz" will be automatically compressed
(recommended to save space)!
Args:
filename (PathLike): Filename to save json file as. If None, the filename will be
set as "{Chemical Formula}_defect_thermodynamics.json" where
filename (PathLike): Filename to save json file as. If None, the
filename will be set as
``{Chemical Formula}_defect_thermodynamics.json.gz`` where
{Chemical Formula} is the chemical formula of the host material.
"""
if filename is None:
if self.bulk_formula is not None:
filename = f"{self.bulk_formula}_defect_thermodynamics.json"
filename = f"{self.bulk_formula}_defect_thermodynamics.json.gz"
else:
filename = "defect_thermodynamics.json"
filename = "defect_thermodynamics.json.gz"

dumpfn(self, filename)

@classmethod
def from_json(cls, filename: PathLike):
"""
Load a ``DefectThermodynamics`` object from a json file.
Load a ``DefectThermodynamics`` object from a json(.gz) file.
Note that ``.json.gz`` files can be loaded directly.
Args:
filename (PathLike):
Expand Down
74 changes: 41 additions & 33 deletions doped/vasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,7 +1272,7 @@ def _write_vasp_xxx_files(
)

if "bulk" not in defect_dir: # not a bulk supercell
self.defect_entry.to_json(f"{output_path}/{self.defect_entry.name}.json")
self.defect_entry.to_json(f"{output_path}/{self.defect_entry.name}.json.gz")

def write_gam(
self,
Expand Down Expand Up @@ -1301,11 +1301,12 @@ def write_gam(
KPAR matching your HPC setup.**
Note that any changes to the default ``INCAR``/``POTCAR`` settings should
be consistent with those used for all defect and competing phase (
chemical potential) calculations.
be consistent with those used for all defect and competing phase
(chemical potential) calculations.
The ``DefectEntry`` object is also written to a ``json`` file in
``defect_dir`` to aid calculation provenance.
The ``DefectEntry`` object is also written to a ``json.gz`` file in
``defect_dir`` to aid calculation provenance --- can be reloaded directly
with ``loadfn()`` from ``monty.serialization``, or ``DefectEntry.from_json()``.
Args:
defect_dir (PathLike):
Expand Down Expand Up @@ -1398,11 +1399,12 @@ def write_std(
KPAR matching your HPC setup.**
Note that any changes to the default ``INCAR``/``POTCAR`` settings should
be consistent with those used for all defect and competing phase (
chemical potential) calculations.
be consistent with those used for all defect and competing phase
(chemical potential) calculations.
The ``DefectEntry`` object is also written to a ``json`` file in
``defect_dir`` to aid calculation provenance.
The ``DefectEntry`` object is also written to a ``json.gz`` file in
``defect_dir`` to aid calculation provenance --- can be reloaded directly
with ``loadfn()`` from ``monty.serialization``, or ``DefectEntry.from_json()``.
Args:
defect_dir (PathLike):
Expand Down Expand Up @@ -1501,11 +1503,12 @@ def write_nkred_std(
KPAR matching your HPC setup.**
Note that any changes to the default ``INCAR``/``POTCAR`` settings should
be consistent with those used for all defect and competing phase (
chemical potential) calculations.
be consistent with those used for all defect and competing phase
(chemical potential) calculations.
The ``DefectEntry`` object is also written to a ``json`` file in
``defect_dir`` to aid calculation provenance.
The ``DefectEntry`` object is also written to a ``json.gz`` file in
``defect_dir`` to aid calculation provenance --- can be reloaded directly
with ``loadfn()`` from ``monty.serialization``, or ``DefectEntry.from_json()``.
Args:
defect_dir (PathLike):
Expand Down Expand Up @@ -1604,11 +1607,12 @@ def write_ncl(
KPAR matching your HPC setup.**
Note that any changes to the default ``INCAR``/``POTCAR`` settings should
be consistent with those used for all defect and competing phase (
chemical potential) calculations.
be consistent with those used for all defect and competing phase
(chemical potential) calculations.
The ``DefectEntry`` object is also written to a ``json`` file in
``defect_dir`` to aid calculation provenance.
The ``DefectEntry`` object is also written to a ``json.gz`` file in
``defect_dir`` to aid calculation provenance --- can be reloaded directly
with ``loadfn()`` from ``monty.serialization``, or ``DefectEntry.from_json()``.
Args:
defect_dir (PathLike):
Expand Down Expand Up @@ -1743,11 +1747,12 @@ def write_all(
KPAR matching your HPC setup.**
Note that any changes to the default ``INCAR``/``POTCAR`` settings should
be consistent with those used for all defect and competing phase (
chemical potential) calculations.
be consistent with those used for all defect and competing phase
(chemical potential) calculations.
The ``DefectEntry`` object is also written to a ``json`` file in
``defect_dir`` to aid calculation provenance.
The ``DefectEntry`` object is also written to a ``json.gz`` file in
``defect_dir`` to aid calculation provenance --- can be reloaded directly
with ``loadfn()`` from ``monty.serialization``, or ``DefectEntry.from_json()``.
Args:
defect_dir (PathLike):
Expand Down Expand Up @@ -2060,11 +2065,12 @@ def _format_defect_entries_input(
) -> tuple[dict[str, DefectEntry], str, Union[dict[str, DefectEntry], DefectsGenerator]]:
r"""
Helper function to format input ``defect_entries`` into a named
dictionary of ``DefectEntry`` objects. Also returns the name of the
JSON file and object to serialise when writing the VASP input to files.
This is the DefectsGenerator object if ``defect_entries`` is a
``DefectsGenerator`` object, otherwise the dictionary of
``DefectEntry`` objects.
dictionary of ``DefectEntry`` objects.
Also returns the name of the JSON file and object to serialise
when writing the VASP input to files. This is the DefectsGenerator
object if ``defect_entries`` is a ``DefectsGenerator`` object,
otherwise the dictionary of ``DefectEntry`` objects.
Args:
defect_entries (``DefectsGenerator``, dict/list of ``DefectEntry``\s, or ``DefectEntry``):
Expand All @@ -2078,14 +2084,14 @@ def _format_defect_entries_input(
``DefectEntry.name`` if the ``name`` attribute is set, otherwise
generated according to the ``doped`` convention (see doped.generation).
"""
json_filename = "defect_entries.json" # global statement in case, but should be skipped
json_filename = "defect_entries.json.gz" # global statement in case, but should be skipped
json_obj = defect_entries
if type(defect_entries).__name__ == "DefectsGenerator":
defect_entries = cast(DefectsGenerator, defect_entries)
formula = defect_entries.primitive_structure.composition.get_reduced_formula_and_factor(
iupac_ordering=True
)[0]
json_filename = f"{formula}_defects_generator.json"
json_filename = f"{formula}_defects_generator.json.gz"
json_obj = defect_entries
defect_entries = defect_entries.defect_entries

Expand Down Expand Up @@ -2124,7 +2130,7 @@ def _format_defect_entries_input(
formula = defect_entry_list[0].defect.structure.composition.get_reduced_formula_and_factor(
iupac_ordering=True
)[0]
json_filename = f"{formula}_defect_entries.json"
json_filename = f"{formula}_defect_entries.json.gz"
json_obj = defect_entries

# check correct format:
Expand Down Expand Up @@ -2220,9 +2226,11 @@ def write_files(
input files for all VASP calculations (gam/std/ncl) are written to the bulk
supercell folder, or if ``bulk = False``, then no bulk folder is created.
The ``DefectEntry`` objects are also written to ``json`` files in the defect
The ``DefectEntry`` objects are also written to ``json.gz`` files in the defect
folders, as well as ``self.defect_entries`` (``self.json_obj``) in the top
folder, to aid calculation provenance.
folder, to aid calculation provenance --- these can be reloaded directly
with ``loadfn()`` from ``monty.serialization``, or individually with
``DefectEntry.from_json()``.
See the ``RelaxSet.yaml`` and ``DefectSet.yaml`` files in the
``doped/VASP_sets`` folder for the default ``INCAR`` and ``KPOINT`` settings,
Expand Down Expand Up @@ -2332,8 +2340,8 @@ def __repr__(self):

# TODO: Go through and update docstrings with descriptions all the default behaviour (INCAR,
# KPOINTS settings etc)
# TODO: Ensure json serializability, and have optional parameter to output DefectRelaxSet jsons to
# written folders as well (but off by default)
# TODO: Have optional parameter to output DefectRelaxSet jsons to written folders as well (but off by
# default)?
# TODO: Likewise, add same to/from json etc. functions for DefectRelaxSet. __Dict__ methods apply
# to `.defect_sets` etc?
# TODO: Implement renaming folders like SnB if we try to write a folder that already exists,
Expand Down
1 change: 0 additions & 1 deletion examples/CdTe/CdTe_2D_defect_carrier_concentrations.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/CdTe/CdTe_LZ_thermo_wout_meta.json

This file was deleted.

Binary file added examples/CdTe/CdTe_LZ_thermo_wout_meta.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/CdTe/CdTe_example_defect_dict.json

This file was deleted.

Binary file added examples/CdTe/CdTe_example_defect_dict.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/CdTe/CdTe_example_thermo.json

This file was deleted.

Binary file added examples/CdTe/CdTe_example_thermo.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/CdTe/CdTe_thermo_wout_meta.json

This file was deleted.

Binary file added examples/CdTe/CdTe_thermo_wout_meta.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/Defects/MgO_defect_dict.json

This file was deleted.

Binary file added examples/MgO/Defects/MgO_defect_dict.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/Defects/Mg_O_+1/vasp_std/Mg_O_+1.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/Defects/Mg_O_+2/vasp_std/Mg_O_+2.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/Defects/Mg_O_+3/vasp_std/Mg_O_+3.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/Defects/Mg_O_+4/vasp_std/Mg_O_+4.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/Defects/Mg_O_0/vasp_std/Mg_O_0.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/MgO_defect_dict.json

This file was deleted.

Binary file added examples/MgO/MgO_defect_dict.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/MgO_defects_generator.json

This file was deleted.

Binary file added examples/MgO/MgO_defects_generator.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/MgO/MgO_thermo.json

This file was deleted.

Binary file added examples/MgO/MgO_thermo.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/Sb2Si2Te6/Sb2Si2Te6_example_defect_dict.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/Sb2Si2Te6/Sb2Si2Te6_example_thermo.json

This file was deleted.

Binary file not shown.
1 change: 0 additions & 1 deletion examples/YTOS/YTOS_example_defect_dict.json

This file was deleted.

Binary file added examples/YTOS/YTOS_example_defect_dict.json.gz
Binary file not shown.
1 change: 0 additions & 1 deletion examples/YTOS/YTOS_example_thermo.json

This file was deleted.

Binary file added examples/YTOS/YTOS_example_thermo.json.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion examples/generation_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1775,7 +1775,7 @@
},
"source": [
"As well as the `INCAR`, `POTCAR` and `KPOINTS` files for each calculation in the defects workflow, we\n",
"also see that a `{defect_species}.json` file is created by default, which contains the corresponding\n",
"also see that a `{defect_species}.json.gz` file is created by default, which contains the corresponding\n",
"`DefectEntry` python object, which can later be reloaded with `DefectEntry.from_json()` (useful if we\n",
"later want to recheck some of the defect generation info).\n",
"The `DefectsGenerator` object is also saved to `JSON` by default here, which can be reloaded with\n",
Expand Down
8 changes: 4 additions & 4 deletions examples/parsing_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@
"outputs": [],
"source": [
"from monty.serialization import dumpfn, loadfn\n",
"dumpfn(dp.defect_dict, fn=\"CdTe_defect_dict.json\") # save parsed defect entries to file"
"dumpfn(dp.defect_dict, fn=\"CdTe_defect_dict.json.gz\") # save parsed defect entries to file"
]
},
{
Expand All @@ -248,7 +248,7 @@
"outputs": [],
"source": [
"# we can then reload these parsed defect entries from file at any later point with:\n",
"CdTe_defect_dict = loadfn(\"CdTe_defect_dict.json\")"
"CdTe_defect_dict = loadfn(\"CdTe_defect_dict.json.gz\")"
]
},
{
Expand Down Expand Up @@ -341,7 +341,7 @@
"outputs": [],
"source": [
"CdTe_example_thermo = dp.get_defect_thermodynamics(chempots=CdTe_chempots) # optionally input chempots now, to avoid having to input later\n",
"dumpfn(CdTe_example_thermo, fn=\"CdTe_example_thermo.json\") # save parsed DefectThermodynamics to file, so we don't need to regenerate it later"
"dumpfn(CdTe_example_thermo, fn=\"CdTe_example_thermo.json.gz\") # save parsed DefectThermodynamics to file, so we don't need to regenerate it later"
]
},
{
Expand Down Expand Up @@ -432,7 +432,7 @@
}
],
"source": [
"CdTe_defects_thermo = loadfn(\"CdTe/CdTe_thermo_wout_meta.json\") # excludes metastable states\n",
"CdTe_defects_thermo = loadfn(\"CdTe/CdTe_thermo_wout_meta.json.gz\") # excludes metastable states\n",
"def_plot = CdTe_defects_thermo.plot(limit=\"Te-rich\")"
]
},
Expand Down
Loading

0 comments on commit 1a1b5cd

Please sign in to comment.