Skip to content
This repository has been archived by the owner on Dec 8, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into openai-instructor
Browse files Browse the repository at this point in the history
  • Loading branch information
JR-1991 committed Mar 20, 2024
2 parents c90d18c + a50a66c commit eed7657
Show file tree
Hide file tree
Showing 26 changed files with 776 additions and 417 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<h1 align="center">
Software-Driven RDM</br>
<a href="https://badge.fury.io/py/sdRDM"><img src="https://badge.fury.io/py/sdRDM.svg" alt="PyPI version" height="18"></a>
<img src="https://img.shields.io/badge/python-3.9|3.10|3.11-blue.svg" alt="Build Badge">
<img src="https://img.shields.io/badge/python-3.9 | 3.10 | 3.11-blue.svg" alt="Build Badge">
<img src="https://github.com/JR-1991/software-driven-rdm/actions/workflows/tests.yml/badge.svg" alt="Build Badge">
</h1>
<p align="center">
Expand Down
706 changes: 352 additions & 354 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "sdRDM"
version = "0.2.1"
version = "0.2.2"
description = "Software-driven RDM converts markdown data models into powerful objects that can be serialized to JSON, XML, YAML and HDF5"
authors = ["Jan Range <[email protected]>"]
license = "MIT License"
Expand All @@ -9,8 +9,8 @@ packages = [{include = "sdRDM"}]

[tool.poetry.dependencies]
python = "^3.9"
pydantic = "^2.5.3"
pydantic-xml = "^2.7.0"
pydantic = "^2.6.0"
pydantic-xml = "^2.9.0"
numpy = "^1.26.3"
pandas = "^2.1.4"
jinja2 = "^3.1.3"
Expand All @@ -31,6 +31,7 @@ markdown-it-py = "^3.0.0"
rich = "^13.7.0"
lxml = "^5.1.0"
instructor = "^0.4.6"
python-frontmatter = "^1.1.0"

[tool.poetry.group.dev.dependencies]
coverage = "^7.4.0"
Expand Down
2 changes: 1 addition & 1 deletion sdRDM/base/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .datamodel import DataModel
from .linker import Linker
from ..tools.linker import Linker
22 changes: 11 additions & 11 deletions sdRDM/base/datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from anytree import Node, LevelOrderIter
from bigtree import print_tree, levelorder_iter, yield_tree
from functools import lru_cache
from lxml.etree import _Element
from pydantic import ConfigDict, PrivateAttr, field_validator
from typing import (
Any,
Expand All @@ -45,7 +46,6 @@
from sdRDM.generator.utils import extract_modules
from sdRDM.tools.utils import YAMLDumper
from sdRDM.tools.gitutils import (
ObjectNode,
build_library_from_git_specs,
_import_library,
)
Expand Down Expand Up @@ -179,7 +179,7 @@ def _get_by_meta_path(
query,
)

if reference:
if reference is not None:
references.append(reference)

return references
Expand Down Expand Up @@ -265,7 +265,7 @@ def paths(self, leaves: bool = False):
"""Returns all possible paths of an instantiated data model. Can also be reduced to just leaves."""

# Get JSON representation
model = Nob(self.to_dict(warn=False))
model = Nob(self.to_dict(warn=False, mode="python"))

if leaves:
return model.leaves
Expand Down Expand Up @@ -329,6 +329,7 @@ def to_dict(
mode="json",
**kwargs,
):

data = super().model_dump(
exclude_none=exclude_none,
by_alias=True,
Expand Down Expand Up @@ -621,23 +622,21 @@ def _is_hdf5(path: str):

@classmethod
def from_markdown(cls, path: str) -> ImportedModules:
"""Fetches a Markdown specification from a git repository and builds the library accordingly.
This function will clone the repository into a temporary directory and
builds the correpsonding API and loads it into the memory. After that
the cloned repository is deleted and the root object(s) detected.
"""Converts a markdown file into a in-memory Python API.
Args:
url (str): Link to the git repository. Use the URL ending with ".git".
commit (Optional[str], optional): Hash of the commit to fetch from. Defaults to None.
path (str): Path to the markdown file.
"""

with tempfile.TemporaryDirectory() as tmpdirname:
# Generate API to parse the file
lib_name = f"sdRDM-Library-{str(random.randint(0,30))}"
api_loc = os.path.join(tmpdirname, lib_name)
generate_python_api(
path=path, dirpath=tmpdirname, libname=lib_name, use_formatter=False
path=path,
dirpath=tmpdirname,
libname=lib_name,
use_formatter=False,
)

lib = _import_library(api_loc, lib_name)
Expand All @@ -663,6 +662,7 @@ def from_git(
url (str): Link to the git repository. Use the URL ending with ".git".
commit (Optional[str], optional): Hash of the commit to fetch from. Defaults to None.
tag (Optional[str], optional): Tag of the release or branch to fetch from. Defaults to None.
only_classes (bool, optional): If True, only the classes will be returned. Defaults to False.
"""

if not validators.url(url):
Expand Down
30 changes: 25 additions & 5 deletions sdRDM/base/datatypes/unit.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from uuid import uuid4
from pydantic_xml import attr, element, wrapped
from pydantic import model_validator
import sdRDM

from typing import List, Union
from astropy.units import UnitBase, Unit as AstroUnit
from astropy.units import UnitBase, Unit as AstroUnit, dimensionless_unscaled
from pydantic import field_serializer, PrivateAttr


Expand All @@ -25,7 +26,6 @@ def _serialize_kind(self, v):

class Unit(
sdRDM.DataModel,
nsmap={"": "https://www.github.com/software-driven-rdm"},
tag="Unit",
):
"""
Expand All @@ -46,9 +46,22 @@ class Unit(

id: str = attr(name="id", default_factory=lambda: str(uuid4()))
name: str = attr(name="name")
bases: List[BaseUnit] = element()
_unit: UnitBase = PrivateAttr()
_hash: int = PrivateAttr()
bases: List[BaseUnit] = wrapped(
"listOfUnits",
element(tag="unit"),
)

_unit: UnitBase = PrivateAttr(default=None)
_hash: int = PrivateAttr(default=None)

@model_validator(mode="after")
def create_astropy_unit(self):
if self._unit is None and self.name != "dimensionless":
self._unit = AstroUnit(self.name)
elif self.name == "dimensionless":
self._unit = AstroUnit(dimensionless_unscaled)

return self

@classmethod
def from_string(cls, unit_string: str):
Expand All @@ -64,6 +77,13 @@ def from_string(cls, unit_string: str):
Raises:
AssertionError: If the unit is not a UnitBase or Unit.
"""

if unit_string.lower() == "dimensionless":
return cls(
name="dimensionless",
bases=[],
)

unit = AstroUnit(unit_string)

assert isinstance(
Expand Down
1 change: 0 additions & 1 deletion sdRDM/base/ioutils/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def write_hdf5(dataset, file: Union[H5File, str]):
if is_array and not is_multiple_numeric:
_write_array(attribute, data, group)
elif not is_array and is_multiple_numeric:
print(np.array(data), np.array(data).shape)
_write_array(attribute, np.array(data), group)
else:
_write_attr(attribute, data, group) # type: ignore
Expand Down
13 changes: 12 additions & 1 deletion sdRDM/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def generate(
None,
help="Commit hash from which this API was generated",
),
json_schemes: bool = typer.Option(
default=False,
help="Generate JSON schemes for the API",
),
):
"""Generates a Python API based on the Markdown fiels found in the path.
Expand All @@ -60,7 +64,14 @@ def generate(
# Convert into valid URL
url = url.replace("git://", "https://", 1)

generate_python_api(path=path, dirpath=out, libname=name, commit=commit, url=url)
generate_python_api(
path=path,
dirpath=out,
libname=name,
commit=commit,
url=url,
json_schemes=json_schemes,
)


@app.command()
Expand Down
47 changes: 39 additions & 8 deletions sdRDM/generator/classrender.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ def render_object(
objects: List[Dict],
enums: List[Dict],
inherits: List[Dict],
namespaces: Dict,
repo: Optional[str] = None,
commit: Optional[str] = None,
small_types: Dict = {},
add_id_field: bool = True,
) -> str:
"""Renders a class of type object coming from a parsed Markdown model"""

Expand All @@ -31,6 +33,8 @@ def render_object(
objects=all_objects,
repo=repo,
commit=commit,
namespaces=namespaces,
add_id_field=add_id_field,
)
for subtype in small_types.values()
if subtype["origin"] == object["name"]
Expand All @@ -46,6 +50,8 @@ def render_object(
objects=all_objects,
repo=repo,
commit=commit,
namespaces=namespaces,
add_id_field=add_id_field,
)

methods_part = render_add_methods(
Expand Down Expand Up @@ -84,6 +90,8 @@ def render_class(
object: Dict,
inherits: List[Dict],
objects: List[Dict],
namespaces: Dict,
add_id_field: bool,
repo: Optional[str] = None,
commit: Optional[str] = None,
) -> str:
Expand Down Expand Up @@ -115,6 +123,8 @@ def render_class(
],
repo=repo,
commit=commit,
namespaces=namespaces,
add_id_field=add_id_field,
)


Expand Down Expand Up @@ -147,15 +157,17 @@ def render_attribute(
attribute["default_factory"] = "ListPlus"
elif not is_multiple and is_all_optional:
attribute["default_factory"] = f"{attribute['type'][0]}"
del attribute["default"]

if "default" in attribute:
del attribute["default"]

if has_reference:
reference_types = get_reference_type(attribute["reference"], objects)
attribute["type"] += reference_types

if is_multiple and tag != "None":
xml_alias = tag
tag = attribute["type"][0]
if tag and len(tag.split("/")) > 1:
xml_alias = "/".join(tag.split("/")[:-1])
tag = _transform(attribute["type"][0], tag.split("/")[-1])
wrapped = True
else:
xml_alias = None
Expand Down Expand Up @@ -453,6 +465,8 @@ def assemble_signature(
except StopIteration:
if type in small_types:
sub_object = small_types[type]
elif type in DataTypes.__members__:
return []
else:
raise ValueError(f"Sub object '{type}' has no attributes.")

Expand Down Expand Up @@ -553,6 +567,10 @@ def render_imports(
continue

parent_type = inherit["parent"]

if parent_type in DataTypes.__members__:
continue

all_types += gather_all_types(
get_object(parent_type, objects)["attributes"],
objects,
Expand Down Expand Up @@ -652,12 +670,25 @@ def process_subtypes(
subtypes = gather_all_types(attributes, objects, small_types, object["name"])

if object.get("parent"):
parent_obj = get_object(object["parent"], objects)
subtypes += gather_all_types(
parent_obj["attributes"], objects, small_types, parent_obj["name"]
)

if object["parent"] in DataTypes.__members__:
pass
else:
parent_obj = get_object(object["parent"], objects)
subtypes += gather_all_types(
parent_obj["attributes"], objects, small_types, parent_obj["name"]
)

for subtype in subtypes:
types.append(subtype)

return types


def _transform(dtype: str, tag: str) -> str:
"""Transforms a dtype into a tag for special cases"""

if dtype == "MathML":
return "math"

return tag
Loading

0 comments on commit eed7657

Please sign in to comment.