Merge branch 'main' into openai-instructor

FAIRChemistry · Mar 20, 2024 · eed7657 · eed7657
2 parents c90d18c + a50a66c
commit eed7657
Show file tree

Hide file tree

Showing 26 changed files with 776 additions and 417 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 <h1 align="center">
   Software-Driven RDM</br>
   <a href="https://badge.fury.io/py/sdRDM"><img src="https://badge.fury.io/py/sdRDM.svg" alt="PyPI version" height="18"></a>
-  <img src="https://img.shields.io/badge/python-3.9|3.10|3.11-blue.svg" alt="Build Badge">
+  <img src="https://img.shields.io/badge/python-3.9 | 3.10 | 3.11-blue.svg" alt="Build Badge">
   <img src="https://github.com/JR-1991/software-driven-rdm/actions/workflows/tests.yml/badge.svg" alt="Build Badge">
 </h1>
 <p align="center">

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sdRDM"
-version = "0.2.1"
+version = "0.2.2"
 description = "Software-driven RDM converts markdown data models into powerful objects that can be serialized to JSON, XML, YAML and HDF5"
 authors = ["Jan Range <[email protected]>"]
 license = "MIT License"
@@ -9,8 +9,8 @@ packages = [{include = "sdRDM"}]
 
 [tool.poetry.dependencies]
 python = "^3.9"
-pydantic = "^2.5.3"
-pydantic-xml = "^2.7.0"
+pydantic = "^2.6.0"
+pydantic-xml = "^2.9.0"
 numpy = "^1.26.3"
 pandas = "^2.1.4"
 jinja2 = "^3.1.3"
@@ -31,6 +31,7 @@ markdown-it-py = "^3.0.0"
 rich = "^13.7.0"
 lxml = "^5.1.0"
 instructor = "^0.4.6"
+python-frontmatter = "^1.1.0"
 
 [tool.poetry.group.dev.dependencies]
 coverage = "^7.4.0"

diff --git a/sdRDM/base/__init__.py b/sdRDM/base/__init__.py
@@ -1,2 +1,2 @@
 from .datamodel import DataModel
-from .linker import Linker
+from ..tools.linker import Linker
diff --git a/sdRDM/base/datamodel.py b/sdRDM/base/datamodel.py
@@ -19,6 +19,7 @@
 from anytree import Node, LevelOrderIter
 from bigtree import print_tree, levelorder_iter, yield_tree
 from functools import lru_cache
+from lxml.etree import _Element
 from pydantic import ConfigDict, PrivateAttr, field_validator
 from typing import (
     Any,
@@ -45,7 +46,6 @@
 from sdRDM.generator.utils import extract_modules
 from sdRDM.tools.utils import YAMLDumper
 from sdRDM.tools.gitutils import (
-    ObjectNode,
     build_library_from_git_specs,
     _import_library,
 )
@@ -179,7 +179,7 @@ def _get_by_meta_path(
                 query,
             )
 
-            if reference:
+            if reference is not None:
                 references.append(reference)
 
         return references
@@ -265,7 +265,7 @@ def paths(self, leaves: bool = False):
         """Returns all possible paths of an instantiated data model. Can also be reduced to just leaves."""
 
         # Get JSON representation
-        model = Nob(self.to_dict(warn=False))
+        model = Nob(self.to_dict(warn=False, mode="python"))
 
         if leaves:
             return model.leaves
@@ -329,6 +329,7 @@ def to_dict(
         mode="json",
         **kwargs,
     ):
+
         data = super().model_dump(
             exclude_none=exclude_none,
             by_alias=True,
@@ -621,23 +622,21 @@ def _is_hdf5(path: str):
 
     @classmethod
     def from_markdown(cls, path: str) -> ImportedModules:
-        """Fetches a Markdown specification from a git repository and builds the library accordingly.
-
-        This function will clone the repository into a temporary directory and
-        builds the correpsonding API and loads it into the memory. After that
-        the cloned repository is deleted and the root object(s) detected.
+        """Converts a markdown file into a in-memory Python API.
 
         Args:
-            url (str): Link to the git repository. Use the URL ending with ".git".
-            commit (Optional[str], optional): Hash of the commit to fetch from. Defaults to None.
+            path (str): Path to the markdown file.
         """
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             # Generate API to parse the file
             lib_name = f"sdRDM-Library-{str(random.randint(0,30))}"
             api_loc = os.path.join(tmpdirname, lib_name)
             generate_python_api(
-                path=path, dirpath=tmpdirname, libname=lib_name, use_formatter=False
+                path=path,
+                dirpath=tmpdirname,
+                libname=lib_name,
+                use_formatter=False,
             )
 
             lib = _import_library(api_loc, lib_name)
@@ -663,6 +662,7 @@ def from_git(
             url (str): Link to the git repository. Use the URL ending with ".git".
             commit (Optional[str], optional): Hash of the commit to fetch from. Defaults to None.
             tag (Optional[str], optional): Tag of the release or branch to fetch from. Defaults to None.
+            only_classes (bool, optional): If True, only the classes will be returned. Defaults to False.
         """
 
         if not validators.url(url):

diff --git a/sdRDM/base/datatypes/unit.py b/sdRDM/base/datatypes/unit.py
@@ -1,9 +1,10 @@
 from uuid import uuid4
 from pydantic_xml import attr, element, wrapped
+from pydantic import model_validator
 import sdRDM
 
 from typing import List, Union
-from astropy.units import UnitBase, Unit as AstroUnit
+from astropy.units import UnitBase, Unit as AstroUnit, dimensionless_unscaled
 from pydantic import field_serializer, PrivateAttr
 
 
@@ -25,7 +26,6 @@ def _serialize_kind(self, v):
 
 class Unit(
     sdRDM.DataModel,
-    nsmap={"": "https://www.github.com/software-driven-rdm"},
     tag="Unit",
 ):
     """
@@ -46,9 +46,22 @@ class Unit(
 
     id: str = attr(name="id", default_factory=lambda: str(uuid4()))
     name: str = attr(name="name")
-    bases: List[BaseUnit] = element()
-    _unit: UnitBase = PrivateAttr()
-    _hash: int = PrivateAttr()
+    bases: List[BaseUnit] = wrapped(
+        "listOfUnits",
+        element(tag="unit"),
+    )
+
+    _unit: UnitBase = PrivateAttr(default=None)
+    _hash: int = PrivateAttr(default=None)
+
+    @model_validator(mode="after")
+    def create_astropy_unit(self):
+        if self._unit is None and self.name != "dimensionless":
+            self._unit = AstroUnit(self.name)
+        elif self.name == "dimensionless":
+            self._unit = AstroUnit(dimensionless_unscaled)
+
+        return self
 
     @classmethod
     def from_string(cls, unit_string: str):
@@ -64,6 +77,13 @@ def from_string(cls, unit_string: str):
         Raises:
             AssertionError: If the unit is not a UnitBase or Unit.
         """
+
+        if unit_string.lower() == "dimensionless":
+            return cls(
+                name="dimensionless",
+                bases=[],
+            )
+
         unit = AstroUnit(unit_string)
 
         assert isinstance(

diff --git a/sdRDM/base/ioutils/hdf5.py b/sdRDM/base/ioutils/hdf5.py
@@ -57,7 +57,6 @@ def write_hdf5(dataset, file: Union[H5File, str]):
         if is_array and not is_multiple_numeric:
             _write_array(attribute, data, group)
         elif not is_array and is_multiple_numeric:
-            print(np.array(data), np.array(data).shape)
             _write_array(attribute, np.array(data), group)
         else:
             _write_attr(attribute, data, group)  # type: ignore

diff --git a/sdRDM/cli.py b/sdRDM/cli.py
@@ -44,6 +44,10 @@ def generate(
         None,
         help="Commit hash from which this API was generated",
     ),
+    json_schemes: bool = typer.Option(
+        default=False,
+        help="Generate JSON schemes for the API",
+    ),
 ):
     """Generates a Python API based on the Markdown fiels found in the path.
 
@@ -60,7 +64,14 @@ def generate(
         # Convert into valid URL
         url = url.replace("git://", "https://", 1)
 
-    generate_python_api(path=path, dirpath=out, libname=name, commit=commit, url=url)
+    generate_python_api(
+        path=path,
+        dirpath=out,
+        libname=name,
+        commit=commit,
+        url=url,
+        json_schemes=json_schemes,
+    )
 
 
 @app.command()

diff --git a/sdRDM/generator/classrender.py b/sdRDM/generator/classrender.py
@@ -14,9 +14,11 @@ def render_object(
     objects: List[Dict],
     enums: List[Dict],
     inherits: List[Dict],
+    namespaces: Dict,
     repo: Optional[str] = None,
     commit: Optional[str] = None,
     small_types: Dict = {},
+    add_id_field: bool = True,
 ) -> str:
     """Renders a class of type object coming from a parsed Markdown model"""
 
@@ -31,6 +33,8 @@ def render_object(
                     objects=all_objects,
                     repo=repo,
                     commit=commit,
+                    namespaces=namespaces,
+                    add_id_field=add_id_field,
                 )
                 for subtype in small_types.values()
                 if subtype["origin"] == object["name"]
@@ -46,6 +50,8 @@ def render_object(
         objects=all_objects,
         repo=repo,
         commit=commit,
+        namespaces=namespaces,
+        add_id_field=add_id_field,
     )
 
     methods_part = render_add_methods(
@@ -84,6 +90,8 @@ def render_class(
     object: Dict,
     inherits: List[Dict],
     objects: List[Dict],
+    namespaces: Dict,
+    add_id_field: bool,
     repo: Optional[str] = None,
     commit: Optional[str] = None,
 ) -> str:
@@ -115,6 +123,8 @@ def render_class(
         ],
         repo=repo,
         commit=commit,
+        namespaces=namespaces,
+        add_id_field=add_id_field,
     )
 
 
@@ -147,15 +157,17 @@ def render_attribute(
         attribute["default_factory"] = "ListPlus"
     elif not is_multiple and is_all_optional:
         attribute["default_factory"] = f"{attribute['type'][0]}"
-        del attribute["default"]
+
+        if "default" in attribute:
+            del attribute["default"]
 
     if has_reference:
         reference_types = get_reference_type(attribute["reference"], objects)
         attribute["type"] += reference_types
 
-    if is_multiple and tag != "None":
-        xml_alias = tag
-        tag = attribute["type"][0]
+    if tag and len(tag.split("/")) > 1:
+        xml_alias = "/".join(tag.split("/")[:-1])
+        tag = _transform(attribute["type"][0], tag.split("/")[-1])
         wrapped = True
     else:
         xml_alias = None
@@ -453,6 +465,8 @@ def assemble_signature(
     except StopIteration:
         if type in small_types:
             sub_object = small_types[type]
+        elif type in DataTypes.__members__:
+            return []
         else:
             raise ValueError(f"Sub object '{type}' has no attributes.")
 
@@ -553,6 +567,10 @@ def render_imports(
             continue
 
         parent_type = inherit["parent"]
+
+        if parent_type in DataTypes.__members__:
+            continue
+
         all_types += gather_all_types(
             get_object(parent_type, objects)["attributes"],
             objects,
@@ -652,12 +670,25 @@ def process_subtypes(
     subtypes = gather_all_types(attributes, objects, small_types, object["name"])
 
     if object.get("parent"):
-        parent_obj = get_object(object["parent"], objects)
-        subtypes += gather_all_types(
-            parent_obj["attributes"], objects, small_types, parent_obj["name"]
-        )
+
+        if object["parent"] in DataTypes.__members__:
+            pass
+        else:
+            parent_obj = get_object(object["parent"], objects)
+            subtypes += gather_all_types(
+                parent_obj["attributes"], objects, small_types, parent_obj["name"]
+            )
 
     for subtype in subtypes:
         types.append(subtype)
 
     return types
+
+
+def _transform(dtype: str, tag: str) -> str:
+    """Transforms a dtype into a tag for special cases"""
+
+    if dtype == "MathML":
+        return "math"
+
+    return tag