Skip to content

Commit

Permalink
Feature/add vfp (#10)
Browse files Browse the repository at this point in the history
* Move SUBMODULES and SUBMOD_DICT

* Add assignment of contents
  • Loading branch information
daniel-sol authored Oct 31, 2023
1 parent 4e6335a commit 47986f5
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 93 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

*version.py
118 changes: 118 additions & 0 deletions src/fmu/sumo/sim2sumo/_special_treatments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,120 @@
"""Special treatment of some options used in ecl2df"""
from inspect import signature
import importlib
import logging
from pathlib import Path
import ecl2df
from ecl2df.common import convert_lyrlist_to_zonemap, parse_lyrfile
import pandas as pd
import pyarrow as pa


def convert_to_arrow(frame):
"""Convert pd.DataFrame to arrow
Args:
frame (pd.DataFrame): the frame to convert
Returns:
pa.Table: the converted dataframe
"""
logger = logging.getLogger(__file__ + ".convert_to_arrow")
logger.debug("!!!!Using convert to arrow!!!")
standard = {"DATE": pa.timestamp("ms")}
if "DATE" in frame.columns:
frame["DATE"] = pd.to_datetime(frame["DATE"], infer_datetime_format=True)
scheme = []
for column_name in frame.columns:
if pd.api.types.is_string_dtype(frame[column_name]):
scheme.append((column_name, pa.string()))
else:
scheme.append((column_name, standard.get(column_name, pa.float32())))
logger.debug(scheme)
table = pa.Table.from_pandas(frame, schema=pa.schema(scheme))
return table


def find_arrow_convertor(path):
"""Find function for converting pandas dataframe to arrow
Args:
path (str): path to where to look for function
Returns:
function: function for converting to arrow
"""
logger = logging.getLogger(__file__ + ".find_arrow_convertor")
try:
func = importlib.import_module(path)._df2pyarrow
except AttributeError:
logger.info(
"No premade function for converting to arrow in %s",
path,
)
func = convert_to_arrow

return func


def find_functions_and_docstring(submod):
"""Find functions for extracting and converting from eclipse native
Args:
submod (str): path to where to look for function
Returns:
dictionary: includes functions and doc string
"""
logger = logging.getLogger(__file__ + ".find_func_and_info")

import_path = "ecl2df." + submod
func = importlib.import_module(import_path).df
logger.debug("Assigning %s to %s", func.__name__, submod)
returns = {
"extract": func,
"options": tuple(
name
for name in signature(func).parameters.keys()
if name not in {"deck", "eclfiles"}
),
"arrow_convertor": find_arrow_convertor(import_path),
"doc": func.__doc__,
}

return returns


def _define_submodules():
"""Fetch all submodules
Returns:
list: list of submodules
"""

logger = logging.getLogger(__file__ + "define_submodules")
package_path = Path(ecl2df.__file__).parent

submodules = {}
submod_paths = list(package_path.glob("*.py"))
# vfp breakes the pattern
submod_paths.append("_vfp.py")
for submod_path in submod_paths:
try:
submod_string = str(submod_path.name.replace(".py", ""))
submod = submod_string
except AttributeError:
submod_string = "vfp._vfp"
submod = "vfp"
try:
submodules[submod] = find_functions_and_docstring(submod_string)
logger.debug("Assigning %s to %s", submodules[submod], submod)
except AttributeError:
logger.debug("No df function in %s", submod_path)

logger.debug("Returning the submodule names as a list: %s ", submodules.keys())
logger.debug("Returning the submodules extra args as a dictionary: %s ", submodules)

return tuple(submodules.keys()), submodules


def convert_options(options):
Expand All @@ -16,3 +131,6 @@ def convert_options(options):
parse_lyrfile(options["zonemap"])
)
return options


SUBMODULES, SUBMOD_DICT = _define_submodules()
104 changes: 17 additions & 87 deletions src/fmu/sumo/sim2sumo/sim2sumo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
from typing import Union
from pathlib import Path
import logging
import importlib
import argparse
from inspect import signature
import pandas as pd
import ecl2df as sim2df
import ecl2df
import pyarrow as pa
import yaml
from fmu.dataio import ExportData
from fmu.sumo.uploader.scripts.sumo_upload import sumo_upload_main
from ._special_treatments import convert_options
from ._special_treatments import SUBMODULES, SUBMOD_DICT, convert_options


def yaml_load(file_name):
Expand All @@ -36,52 +33,6 @@ def yaml_load(file_name):
return config


def _define_submodules():
"""Fetch all submodules
Returns:
list: list of submodules
"""

logger = logging.getLogger(__file__ + "define_submodules")
package_path = Path(ecl2df.__file__).parent

submodules = {}
for submod_path in package_path.glob("*.py"):
submod = str(submod_path.name.replace(".py", ""))
try:
func = importlib.import_module("ecl2df." + submod).df
except AttributeError:
logger.debug("No df function in %s", submod_path)
continue
submodules[submod] = {"extract": func}
submodules[submod]["options"] = tuple(
name
for name in signature(func).parameters.keys()
if name not in {"deck", "eclfiles"}
)
submodules[submod]["doc"] = func.__doc__
try:
submodules[submod]["arrow_convertor"] = importlib.import_module(
"ecl2df." + submod
)._df2pyarrow
except AttributeError:
logger.info(
"No premade function for converting to arrow in %s",
submod_path,
)

logger.debug("Assigning %s to %s", submodules[submod], submod)

logger.debug("Returning the submodule names as a list: %s ", submodules.keys())
logger.debug("Returning the submodules extra args as a dictionary: %s ", submodules)

return tuple(submodules.keys()), submodules


SUBMODULES, SUBMOD_DICT = _define_submodules()


def give_name(datafile_path: str) -> str:
"""Return name to assign in metadata
Expand All @@ -98,31 +49,6 @@ def give_name(datafile_path: str) -> str:
return base_name


def convert_to_arrow(frame):
"""Convert pd.DataFrame to arrow
Args:
frame (pd.DataFrame): the frame to convert
Returns:
pa.Table: the converted dataframe
"""
logger = logging.getLogger(__file__ + ".convert_to_arrow")
logger.debug("!!!!Using convert to arrow!!!")
standard = {"DATE": pa.timestamp("ms")}
if "DATE" in frame.columns:
frame["DATE"] = pd.to_datetime(frame["DATE"], infer_datetime_format=True)
scheme = []
for column_name in frame.columns:
if pd.api.types.is_string_dtype(frame[column_name]):
scheme.append((column_name, pa.string()))
else:
scheme.append((column_name, standard.get(column_name, pa.float32())))
logger.debug(scheme)
table = pa.Table.from_pandas(frame, schema=pa.schema(scheme))
return table


def get_results(
datafile_path: str, submod: str, print_help=False, **kwargs
) -> Union[pa.Table, pd.DataFrame]:
Expand Down Expand Up @@ -159,16 +85,13 @@ def get_results(
if arrow:
try:
output = SUBMOD_DICT[submod]["arrow_convertor"](output)
except KeyError:
logger.debug("No arrow convertor defined for %s", submod)
try:
output = convert_to_arrow(output)
except pa.lib.ArrowInvalid:
logger.warning(
"Arrow invalid, cannot convert to arrow, keeping pandas format"
)
except TypeError:
logger.warning("Type error, cannot convert to arrow")

except pa.lib.ArrowInvalid:
logger.warning(
"Arrow invalid, cannot convert to arrow, keeping pandas format"
)
except TypeError:
logger.warning("Type error, cannot convert to arrow")
except RuntimeError:
print(give_help(None))
except TypeError:
Expand Down Expand Up @@ -221,15 +144,22 @@ def export_results(
logger = logging.getLogger(__file__ + ".export_results")
logger.debug("Export will be using these options: %s", kwargs)
frame = get_results(datafile_path, submod, **kwargs)
allowed_contents = {"summary": "timeseries"}
submod_contents = {
"summary": "timeseries",
"satfunc": "relperm",
"vfp": "lift_curves",
}
submod_contents.update(
{name: name for name in ["rft", "pvt", "transmissibilities"]}
)
if frame is not None:
logger.debug("Reading global variables from %s", config_file)
cfg = yaml_load(config_file)
exp = ExportData(
config=cfg,
name=give_name(datafile_path),
tagname=submod,
content=allowed_contents.get(submod, "property")
content=submod_contents.get(submod, "property"),
)
exp_path = exp.export(frame)
else:
Expand Down
15 changes: 9 additions & 6 deletions tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
import pandas as pd
import pyarrow as pa
import pytest
from sumo.wrapper import SumoClient
from fmu.sumo.sim2sumo import sim2sumo
from fmu.sumo.uploader import CaseOnDisk, SumoConnection
from fmu.sumo.sim2sumo._special_treatments import _define_submodules, convert_to_arrow


REEK_ROOT = Path(__file__).parent / "data/reek"
Expand All @@ -30,7 +29,8 @@

def test_submodules_dict():
"""Test generation of submodule list"""
sublist, submods = sim2sumo._define_submodules()
sublist, submods = _define_submodules()
print(sublist, submods)
LOGGER.info(submods)
assert isinstance(sublist, tuple)
assert isinstance(submods, dict)
Expand Down Expand Up @@ -68,7 +68,10 @@ def test_get_results(submod):
frame, pa.Table
), f"Call for get_dataframe with arrow=True should produce pa.Table, but produces {type(frame)}"
if submod == "summary":
assert frame.schema.field("FOPT").metadata is not None, "Metdata not carried across for summary"
assert (
frame.schema.field("FOPT").metadata is not None
), "Metdata not carried across for summary"


@pytest.mark.parametrize(
"submod",
Expand Down Expand Up @@ -123,7 +126,7 @@ def test_export_results_w_options(tmp_path, submod="summary"):
CHECK_DICT = {
"global_variables_w_eclpath.yml": {
"nrdatafile": 1,
"nrsubmods": 16,
"nrsubmods": 17,
"nroptions": 1,
"arrow": True,
},
Expand Down Expand Up @@ -255,7 +258,7 @@ def test_convert_to_arrow():
)
dframe["DATE"] = dframe["DATE"].astype("datetime64[ms]")
print(dframe.dtypes)
table = sim2sumo.convert_to_arrow(dframe)
table = convert_to_arrow(dframe)
assert isinstance(table, pa.Table), "Did not convert to table"


Expand Down

0 comments on commit 47986f5

Please sign in to comment.