diff --git a/.gitignore b/.gitignore index 3f0a56b7..28384cb6 100644 --- a/.gitignore +++ b/.gitignore @@ -159,3 +159,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +*version.py \ No newline at end of file diff --git a/src/fmu/sumo/sim2sumo/_special_treatments.py b/src/fmu/sumo/sim2sumo/_special_treatments.py index 290444ee..9d40f441 100644 --- a/src/fmu/sumo/sim2sumo/_special_treatments.py +++ b/src/fmu/sumo/sim2sumo/_special_treatments.py @@ -1,5 +1,120 @@ """Special treatment of some options used in ecl2df""" +from inspect import signature +import importlib +import logging +from pathlib import Path +import ecl2df from ecl2df.common import convert_lyrlist_to_zonemap, parse_lyrfile +import pandas as pd +import pyarrow as pa + + +def convert_to_arrow(frame): + """Convert pd.DataFrame to arrow + + Args: + frame (pd.DataFrame): the frame to convert + + Returns: + pa.Table: the converted dataframe + """ + logger = logging.getLogger(__file__ + ".convert_to_arrow") + logger.debug("!!!!Using convert to arrow!!!") + standard = {"DATE": pa.timestamp("ms")} + if "DATE" in frame.columns: + frame["DATE"] = pd.to_datetime(frame["DATE"], infer_datetime_format=True) + scheme = [] + for column_name in frame.columns: + if pd.api.types.is_string_dtype(frame[column_name]): + scheme.append((column_name, pa.string())) + else: + scheme.append((column_name, standard.get(column_name, pa.float32()))) + logger.debug(scheme) + table = pa.Table.from_pandas(frame, schema=pa.schema(scheme)) + return table + + +def find_arrow_convertor(path): + """Find function for converting pandas dataframe to arrow + + Args: + path (str): path to where to look for function + + Returns: + function: function for converting to arrow + """ + logger = logging.getLogger(__file__ + ".find_arrow_convertor") + try: + func = importlib.import_module(path)._df2pyarrow + except AttributeError: + logger.info( + "No premade function for converting to arrow in %s", + path, + ) + func = convert_to_arrow + + return func + + +def find_functions_and_docstring(submod): + """Find functions for extracting and converting from eclipse native + + Args: + submod (str): path to where to look for function + + Returns: + dictionary: includes functions and doc string + """ + logger = logging.getLogger(__file__ + ".find_func_and_info") + + import_path = "ecl2df." + submod + func = importlib.import_module(import_path).df + logger.debug("Assigning %s to %s", func.__name__, submod) + returns = { + "extract": func, + "options": tuple( + name + for name in signature(func).parameters.keys() + if name not in {"deck", "eclfiles"} + ), + "arrow_convertor": find_arrow_convertor(import_path), + "doc": func.__doc__, + } + + return returns + + +def _define_submodules(): + """Fetch all submodules + + Returns: + list: list of submodules + """ + + logger = logging.getLogger(__file__ + "define_submodules") + package_path = Path(ecl2df.__file__).parent + + submodules = {} + submod_paths = list(package_path.glob("*.py")) + # vfp breakes the pattern + submod_paths.append("_vfp.py") + for submod_path in submod_paths: + try: + submod_string = str(submod_path.name.replace(".py", "")) + submod = submod_string + except AttributeError: + submod_string = "vfp._vfp" + submod = "vfp" + try: + submodules[submod] = find_functions_and_docstring(submod_string) + logger.debug("Assigning %s to %s", submodules[submod], submod) + except AttributeError: + logger.debug("No df function in %s", submod_path) + + logger.debug("Returning the submodule names as a list: %s ", submodules.keys()) + logger.debug("Returning the submodules extra args as a dictionary: %s ", submodules) + + return tuple(submodules.keys()), submodules def convert_options(options): @@ -16,3 +131,6 @@ def convert_options(options): parse_lyrfile(options["zonemap"]) ) return options + + +SUBMODULES, SUBMOD_DICT = _define_submodules() diff --git a/src/fmu/sumo/sim2sumo/sim2sumo.py b/src/fmu/sumo/sim2sumo/sim2sumo.py index 1817b1e6..e3be5608 100644 --- a/src/fmu/sumo/sim2sumo/sim2sumo.py +++ b/src/fmu/sumo/sim2sumo/sim2sumo.py @@ -4,17 +4,14 @@ from typing import Union from pathlib import Path import logging -import importlib import argparse -from inspect import signature import pandas as pd import ecl2df as sim2df -import ecl2df import pyarrow as pa import yaml from fmu.dataio import ExportData from fmu.sumo.uploader.scripts.sumo_upload import sumo_upload_main -from ._special_treatments import convert_options +from ._special_treatments import SUBMODULES, SUBMOD_DICT, convert_options def yaml_load(file_name): @@ -36,52 +33,6 @@ def yaml_load(file_name): return config -def _define_submodules(): - """Fetch all submodules - - Returns: - list: list of submodules - """ - - logger = logging.getLogger(__file__ + "define_submodules") - package_path = Path(ecl2df.__file__).parent - - submodules = {} - for submod_path in package_path.glob("*.py"): - submod = str(submod_path.name.replace(".py", "")) - try: - func = importlib.import_module("ecl2df." + submod).df - except AttributeError: - logger.debug("No df function in %s", submod_path) - continue - submodules[submod] = {"extract": func} - submodules[submod]["options"] = tuple( - name - for name in signature(func).parameters.keys() - if name not in {"deck", "eclfiles"} - ) - submodules[submod]["doc"] = func.__doc__ - try: - submodules[submod]["arrow_convertor"] = importlib.import_module( - "ecl2df." + submod - )._df2pyarrow - except AttributeError: - logger.info( - "No premade function for converting to arrow in %s", - submod_path, - ) - - logger.debug("Assigning %s to %s", submodules[submod], submod) - - logger.debug("Returning the submodule names as a list: %s ", submodules.keys()) - logger.debug("Returning the submodules extra args as a dictionary: %s ", submodules) - - return tuple(submodules.keys()), submodules - - -SUBMODULES, SUBMOD_DICT = _define_submodules() - - def give_name(datafile_path: str) -> str: """Return name to assign in metadata @@ -98,31 +49,6 @@ def give_name(datafile_path: str) -> str: return base_name -def convert_to_arrow(frame): - """Convert pd.DataFrame to arrow - - Args: - frame (pd.DataFrame): the frame to convert - - Returns: - pa.Table: the converted dataframe - """ - logger = logging.getLogger(__file__ + ".convert_to_arrow") - logger.debug("!!!!Using convert to arrow!!!") - standard = {"DATE": pa.timestamp("ms")} - if "DATE" in frame.columns: - frame["DATE"] = pd.to_datetime(frame["DATE"], infer_datetime_format=True) - scheme = [] - for column_name in frame.columns: - if pd.api.types.is_string_dtype(frame[column_name]): - scheme.append((column_name, pa.string())) - else: - scheme.append((column_name, standard.get(column_name, pa.float32()))) - logger.debug(scheme) - table = pa.Table.from_pandas(frame, schema=pa.schema(scheme)) - return table - - def get_results( datafile_path: str, submod: str, print_help=False, **kwargs ) -> Union[pa.Table, pd.DataFrame]: @@ -159,16 +85,13 @@ def get_results( if arrow: try: output = SUBMOD_DICT[submod]["arrow_convertor"](output) - except KeyError: - logger.debug("No arrow convertor defined for %s", submod) - try: - output = convert_to_arrow(output) - except pa.lib.ArrowInvalid: - logger.warning( - "Arrow invalid, cannot convert to arrow, keeping pandas format" - ) - except TypeError: - logger.warning("Type error, cannot convert to arrow") + + except pa.lib.ArrowInvalid: + logger.warning( + "Arrow invalid, cannot convert to arrow, keeping pandas format" + ) + except TypeError: + logger.warning("Type error, cannot convert to arrow") except RuntimeError: print(give_help(None)) except TypeError: @@ -221,7 +144,14 @@ def export_results( logger = logging.getLogger(__file__ + ".export_results") logger.debug("Export will be using these options: %s", kwargs) frame = get_results(datafile_path, submod, **kwargs) - allowed_contents = {"summary": "timeseries"} + submod_contents = { + "summary": "timeseries", + "satfunc": "relperm", + "vfp": "lift_curves", + } + submod_contents.update( + {name: name for name in ["rft", "pvt", "transmissibilities"]} + ) if frame is not None: logger.debug("Reading global variables from %s", config_file) cfg = yaml_load(config_file) @@ -229,7 +159,7 @@ def export_results( config=cfg, name=give_name(datafile_path), tagname=submod, - content=allowed_contents.get(submod, "property") + content=submod_contents.get(submod, "property"), ) exp_path = exp.export(frame) else: diff --git a/tests/test_functions.py b/tests/test_functions.py index e7da5eaa..3e47e56d 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -7,9 +7,8 @@ import pandas as pd import pyarrow as pa import pytest -from sumo.wrapper import SumoClient from fmu.sumo.sim2sumo import sim2sumo -from fmu.sumo.uploader import CaseOnDisk, SumoConnection +from fmu.sumo.sim2sumo._special_treatments import _define_submodules, convert_to_arrow REEK_ROOT = Path(__file__).parent / "data/reek" @@ -30,7 +29,8 @@ def test_submodules_dict(): """Test generation of submodule list""" - sublist, submods = sim2sumo._define_submodules() + sublist, submods = _define_submodules() + print(sublist, submods) LOGGER.info(submods) assert isinstance(sublist, tuple) assert isinstance(submods, dict) @@ -68,7 +68,10 @@ def test_get_results(submod): frame, pa.Table ), f"Call for get_dataframe with arrow=True should produce pa.Table, but produces {type(frame)}" if submod == "summary": - assert frame.schema.field("FOPT").metadata is not None, "Metdata not carried across for summary" + assert ( + frame.schema.field("FOPT").metadata is not None + ), "Metdata not carried across for summary" + @pytest.mark.parametrize( "submod", @@ -123,7 +126,7 @@ def test_export_results_w_options(tmp_path, submod="summary"): CHECK_DICT = { "global_variables_w_eclpath.yml": { "nrdatafile": 1, - "nrsubmods": 16, + "nrsubmods": 17, "nroptions": 1, "arrow": True, }, @@ -255,7 +258,7 @@ def test_convert_to_arrow(): ) dframe["DATE"] = dframe["DATE"].astype("datetime64[ms]") print(dframe.dtypes) - table = sim2sumo.convert_to_arrow(dframe) + table = convert_to_arrow(dframe) assert isinstance(table, pa.Table), "Did not convert to table"