Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Implement ObjectDataProvider as a Provider #663

Merged
merged 2 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 2 additions & 21 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,6 @@ def generate_meta_tracklog() -> list[meta.TracklogEvent]:
]


def _get_objectdata_provider(
obj: types.Inferrable, dataio: ExportData, meta_existing: dict | None = None
) -> ObjectDataProvider:
"""Derive metadata for the object. Reuse metadata if existing"""
objdata = objectdata_provider_factory(obj, dataio, meta_existing)
objdata.derive_metadata()
return objdata


def _get_meta_filedata(
dataio: ExportData,
obj: types.Inferrable,
Expand All @@ -88,16 +79,6 @@ def _get_meta_filedata(
).get_metadata()


def _get_meta_objectdata(
objdata: ObjectDataProvider,
) -> meta.content.AnyContent | internal.UnsetAnyContent:
return (
internal.UnsetAnyContent.model_validate(objdata.metadata)
if objdata.metadata["content"] == "unset"
else meta.content.AnyContent.model_validate(objdata.metadata)
)


def _get_meta_fmu(fmudata: FmuProvider) -> internal.FMUClassMetaData | None:
try:
return fmudata.get_metadata()
Expand Down Expand Up @@ -173,7 +154,7 @@ def generate_export_metadata(
logger.info("Partially reuse existing metadata from %s", obj)
meta_existing = read_metadata_from_file(obj)

objdata = _get_objectdata_provider(obj, dataio, meta_existing)
objdata = objectdata_provider_factory(obj, dataio, meta_existing)
masterdata = dataio.config.get("masterdata")

metadata = internal.DataClassMeta(
Expand All @@ -184,7 +165,7 @@ def generate_export_metadata(
fmu=_get_meta_fmu(fmudata) if fmudata else None,
masterdata=_get_meta_masterdata(masterdata) if masterdata else None,
access=_get_meta_access(dataio),
data=_get_meta_objectdata(objdata),
data=objdata.get_metadata(),
file=_get_meta_filedata(dataio, obj, objdata, fmudata, compute_md5),
tracklog=generate_meta_tracklog(),
display=_get_meta_display(dataio, objdata),
Expand Down
220 changes: 115 additions & 105 deletions src/fmu/dataio/providers/objectdata/_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from abc import abstractmethod
from copy import deepcopy
from dataclasses import dataclass, field
from datetime import datetime
Expand All @@ -11,8 +11,14 @@
from fmu.dataio._definitions import ConfigurationError
from fmu.dataio._logging import null_logger
from fmu.dataio._utils import generate_description
from fmu.dataio.datastructure._internal.internal import AllowedContent
from fmu.dataio.datastructure.meta import content, enums
from fmu.dataio.datastructure._internal.internal import AllowedContent, UnsetAnyContent
from fmu.dataio.datastructure.meta.content import (
AnyContent,
FMUTimeObject,
Time,
)
from fmu.dataio.datastructure.meta.enums import ContentEnum
from fmu.dataio.providers._base import Provider

if TYPE_CHECKING:
from fmu.dataio.dataio import ExportData
Expand Down Expand Up @@ -41,14 +47,14 @@ class DerivedObjectDescriptor:
@dataclass
class DerivedNamedStratigraphy:
name: str
alias: list[str]
alias: list[str] = field(default_factory=list)

stratigraphic: bool
stratigraphic_alias: list[str]
stratigraphic: bool = field(default=False)
stratigraphic_alias: list[str] = field(default_factory=list)

offset: int
base: str | None
top: str | None
offset: float = field(default=0.0)
base: str | None = field(default=None)
top: str | None = field(default=None)


def derive_name(
Expand Down Expand Up @@ -111,14 +117,14 @@ def get_timedata_from_existing(meta_timedata: dict) -> tuple[datetime, datetime
)


def get_fmu_time_object(timedata_item: list[str]) -> content.FMUTimeObject:
def get_fmu_time_object(timedata_item: list[str]) -> FMUTimeObject:
"""
Returns a FMUTimeObject from a timedata item on list
format: ["20200101", "monitor"] where the first item is a date and
the last item is an optional label
"""
value, *label = timedata_item
return content.FMUTimeObject(
return FMUTimeObject(
value=datetime.strptime(str(value), "%Y%m%d"),
label=label[0] if label else None,
)
Expand All @@ -133,7 +139,7 @@ def get_validated_content(content: str | dict | None) -> AllowedContent:
return AllowedContent(content="unset")

if isinstance(content, str):
return AllowedContent(content=enums.ContentEnum(content))
return AllowedContent(content=ContentEnum(content))

if len(content) > 1:
raise ValueError(
Expand All @@ -146,12 +152,12 @@ def get_validated_content(content: str | dict | None) -> AllowedContent:
logger.debug("content_specific is %s", content_specific)

return AllowedContent.model_validate(
{"content": enums.ContentEnum(usecontent), "content_incl_specific": content}
{"content": ContentEnum(usecontent), "content_incl_specific": content}
)


@dataclass
class ObjectDataProvider(ABC):
class ObjectDataProvider(Provider):
"""Base class for providing metadata for data objects in fmu-dataio, e.g. a surface.

The metadata for the 'data' are constructed by:
Expand All @@ -178,18 +184,74 @@ class ObjectDataProvider(ABC):
time0: datetime | None = field(default=None)
time1: datetime | None = field(default=None)

@staticmethod
def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V:
"""Validate that fmt (file format) matches data and return legal extension."""
try:
return validator[fmt]
except KeyError:
raise ConfigurationError(
f"The file format {fmt} is not supported. ",
f"Valid {subtype} formats are: {list(validator.keys())}",
def __post_init__(self) -> None:
"""Main function here, will populate the metadata block for 'data'."""

# Don't re-initialize data if it's coming from pre-existing metadata.
if self.metadata:
return
mferrera marked this conversation as resolved.
Show resolved Hide resolved

namedstratigraphy = self._derive_named_stratigraphy()
objres = self.get_objectdata()
content_model = get_validated_content(self.dataio.content)

if self.dataio.forcefolder:
if self.dataio.forcefolder.startswith("/"):
raise ValueError("Can't use absolute path as 'forcefolder'")
msg = (
f"The standard folder name is overrided from {objres.efolder} to "
f"{self.dataio.forcefolder}"
)
objres.efolder = self.dataio.forcefolder
logger.info(msg)
warn(msg, UserWarning)

self.metadata["name"] = namedstratigraphy.name
self.metadata["stratigraphic"] = namedstratigraphy.stratigraphic
self.metadata["offset"] = namedstratigraphy.offset
self.metadata["alias"] = namedstratigraphy.alias
self.metadata["top"] = namedstratigraphy.top
self.metadata["base"] = namedstratigraphy.base

self.metadata["content"] = (usecontent := content_model.content)
if content_model.content_incl_specific:
self.metadata[usecontent] = getattr(
content_model.content_incl_specific, usecontent, None
)

def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy:
self.metadata["tagname"] = self.dataio.tagname
self.metadata["format"] = objres.fmt
self.metadata["layout"] = objres.layout
self.metadata["unit"] = self.dataio.unit
self.metadata["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0]
self.metadata["depth_reference"] = list(self.dataio.vertical_domain.values())[0]
self.metadata["spec"] = objres.spec
self.metadata["bbox"] = objres.bbox
self.metadata["table_index"] = objres.table_index
self.metadata["undef_is_zero"] = self.dataio.undef_is_zero

# timedata:
self.metadata["time"] = self._derive_timedata()
self.metadata["is_prediction"] = self.dataio.is_prediction
self.metadata["is_observation"] = self.dataio.is_observation
self.metadata["description"] = generate_description(self.dataio.description)

# the next is to give addition state variables identical values, and for
# consistency these are derived after all eventual validation and directly from
# the self.metadata fields:

self.name = self.metadata["name"]

# then there are a few settings that are not in the ``data`` metadata, but
# needed as data/variables in other classes:

self.efolder = objres.efolder
self.classname = objres.classname
self.extension = objres.extension
self.fmt = objres.fmt
logger.info("Derive all metadata for data object... DONE")

def _derive_named_stratigraphy(self) -> DerivedNamedStratigraphy:
"""Derive the name and stratigraphy for the object; may have several sources.

If not in input settings it is tried to be inferred from the xtgeo/pandas/...
Expand All @@ -201,24 +263,22 @@ def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy:
name = derive_name(self.dataio, self.obj)

# next check if usename has a "truename" and/or aliases from the config
strat = self.dataio.config.get("stratigraphy", {})
no_start_or_missing_name = strat is None or name not in strat
stratigraphy = self.dataio.config.get("stratigraphy", {})

if name not in stratigraphy:
return DerivedNamedStratigraphy(name=name)

named_stratigraphy = stratigraphy.get(name)
rv = DerivedNamedStratigraphy(
name=name if no_start_or_missing_name else strat[name].get("name", name),
alias=[] if no_start_or_missing_name else strat[name].get("alias", []),
stratigraphic=False
if no_start_or_missing_name
else strat[name].get("stratigraphic", False),
stratigraphic_alias=[]
if no_start_or_missing_name
else strat[name].get("stratigraphic_alias"),
offset=0.0 if no_start_or_missing_name else strat[name].get("offset", 0.0),
top=None if no_start_or_missing_name else strat[name].get("top"),
base=None if no_start_or_missing_name else strat[name].get("base"),
name=named_stratigraphy.get("name", name),
alias=named_stratigraphy.get("alias", []),
stratigraphic=named_stratigraphy.get("stratigraphic", False),
stratigraphic_alias=named_stratigraphy.get("stratigraphic_alias", []),
offset=named_stratigraphy.get("offset", 0.0),
top=named_stratigraphy.get("top"),
base=named_stratigraphy.get("base"),
)

if not no_start_or_missing_name and rv.name != "name":
if rv.name != "name":
rv.alias.append(name)

return rv
Expand Down Expand Up @@ -254,9 +314,7 @@ def _derive_timedata(self) -> dict[str, str] | None:

self.time0, self.time1 = start.value, stop.value if stop else None

return content.Time(t0=start, t1=stop).model_dump(
mode="json", exclude_none=True
)
return Time(t0=start, t1=stop).model_dump(mode="json", exclude_none=True)

@abstractmethod
def get_spec(self) -> AnySpecification | None:
Expand All @@ -270,72 +328,24 @@ def get_bbox(self) -> BoundingBox2D | BoundingBox3D | None:
def get_objectdata(self) -> DerivedObjectDescriptor:
raise NotImplementedError

def derive_metadata(self) -> None:
"""Main function here, will populate the metadata block for 'data'."""
logger.info("Derive all metadata for data object...")

namedstratigraphy = self._derive_name_stratigraphy()
objres = self.get_objectdata()
content_model = get_validated_content(self.dataio.content)

if self.dataio.forcefolder:
if self.dataio.forcefolder.startswith("/"):
raise ValueError("Can't use absolute path as 'forcefolder'")
msg = (
f"The standard folder name is overrided from {objres.efolder} to "
f"{self.dataio.forcefolder}"
)
objres.efolder = self.dataio.forcefolder
logger.info(msg)
warn(msg, UserWarning)

meta = self.metadata # shortform

meta["name"] = namedstratigraphy.name
meta["stratigraphic"] = namedstratigraphy.stratigraphic
meta["offset"] = namedstratigraphy.offset
meta["alias"] = namedstratigraphy.alias
meta["top"] = namedstratigraphy.top
meta["base"] = namedstratigraphy.base
def get_metadata(self) -> AnyContent | UnsetAnyContent:
return (
UnsetAnyContent.model_validate(self.metadata)
if self.metadata["content"] == "unset"
else AnyContent.model_validate(self.metadata)
)
mferrera marked this conversation as resolved.
Show resolved Hide resolved

meta["content"] = (usecontent := content_model.content)
if content_model.content_incl_specific:
meta[usecontent] = getattr(
content_model.content_incl_specific, usecontent, None
@staticmethod
def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V:
mferrera marked this conversation as resolved.
Show resolved Hide resolved
"""Validate that fmt (file format) matches data and return legal extension."""
try:
return validator[fmt]
except KeyError:
raise ConfigurationError(
f"The file format {fmt} is not supported. ",
f"Valid {subtype} formats are: {list(validator.keys())}",
)

meta["tagname"] = self.dataio.tagname
meta["format"] = objres.fmt
meta["layout"] = objres.layout
meta["unit"] = self.dataio.unit
meta["vertical_domain"] = list(self.dataio.vertical_domain.keys())[0]
meta["depth_reference"] = list(self.dataio.vertical_domain.values())[0]
meta["spec"] = objres.spec
meta["bbox"] = objres.bbox
meta["table_index"] = objres.table_index
meta["undef_is_zero"] = self.dataio.undef_is_zero

# timedata:
meta["time"] = self._derive_timedata()
meta["is_prediction"] = self.dataio.is_prediction
meta["is_observation"] = self.dataio.is_observation
meta["description"] = generate_description(self.dataio.description)

# the next is to give addition state variables identical values, and for
# consistency these are derived after all eventual validation and directly from
# the self.metadata fields:

self.name = meta["name"]

# then there are a few settings that are not in the ``data`` metadata, but
# needed as data/variables in other classes:

self.efolder = objres.efolder
self.classname = objres.classname
self.extension = objres.extension
self.fmt = objres.fmt
logger.info("Derive all metadata for data object... DONE")

@classmethod
def from_metadata_dict(
cls, obj: Inferrable, dataio: ExportData, meta_existing: dict
Expand Down
Loading