From 36523569dcb6585dd4b1b91240be696081b210a5 Mon Sep 17 00:00:00 2001 From: Runar Ask Johannessen <89020325+equinor-ruaj@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:34:44 +0200 Subject: [PATCH 1/2] Refactoring (#99) * Simplify debug logging * Simplify method * Move function used only for test fixture * Simplify method * Remove unused import * Remove get_help functionality * Simplify convert_table_2_sumo_file and convert_xtgeo_2_sumo_file * Remove debug logging * Remove test of removed function * Move asserts to before function returns * Remove code asserts in favour of tests * Remove deprecated argument * Fix test_get_table to test what it says it does * Remove unused function * Organize imports * Run tests sequentially * Simplify global dict specification * Reduce debug logging * Simplify test constants and remove verbose prints * Remove params that were always defaulted * Remove verbose logging and some refactoring * Move sleep to after the action that makes the sleep necessary * Simplify test assert * Remove deprecated ssdl block from test config files * Fix test warning could not auto detect case path * Add required info when setting content=property on grid3d metadata * Make printout after test ert run more readable * Separate test that runs ert to own file * Simplify * Fix typo * Simplify function return * Fix warning message * Remove logging --- .github/workflows/run_tests.yaml | 3 +- src/fmu/sumo/sim2sumo/_special_treatments.py | 50 +----- src/fmu/sumo/sim2sumo/common.py | 61 +------ src/fmu/sumo/sim2sumo/grid3d.py | 151 ++++------------- src/fmu/sumo/sim2sumo/main.py | 56 +++---- src/fmu/sumo/sim2sumo/tables.py | 98 +++-------- tests/conftest.py | 77 ++++++--- .../fmuconfig/output/global_variables.yml | 4 +- .../output/global_variables_w_eclpath.yml | 4 +- .../global_variables_w_eclpath_and_extras.yml | 4 +- .../results/tables/.2_r001_reek--rft.csv.yml | 3 - .../.2_r001_reek--wellconnstatus.csv.yml | 3 - .../fmuconfig/output/global_variables.yml | 4 +- .../output/global_variables_w_eclpath.yml | 4 +- .../global_variables_w_eclpath_and_extras.yml | 4 +- tests/test_functions.py | 156 +++--------------- tests/test_w_drogon.py | 25 ++- tests/test_with_ert.py | 79 +++++++++ 18 files changed, 264 insertions(+), 522 deletions(-) create mode 100644 tests/test_with_ert.py diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index f6d6cd53..7e75aca8 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -14,6 +14,7 @@ jobs: matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] os: [ubuntu-latest] + max-parallel: 1 permissions: contents: read id-token: write @@ -57,4 +58,4 @@ jobs: python -c 'import sys; print(sys.platform)' python -c 'import os; import sys; print(os.path.dirname(sys.executable))' - pytest --log-cli-level WARNING -s --timeout=300 + pytest --log-cli-level=WARNING -s --timeout=300 diff --git a/src/fmu/sumo/sim2sumo/_special_treatments.py b/src/fmu/sumo/sim2sumo/_special_treatments.py index b07d6706..55ed8bde 100644 --- a/src/fmu/sumo/sim2sumo/_special_treatments.py +++ b/src/fmu/sumo/sim2sumo/_special_treatments.py @@ -25,9 +25,7 @@ def convert_to_arrow(frame): logger.debug("!!!!Using convert to arrow!!!") standard = {"DATE": pa.timestamp("ms")} if "DATE" in frame.columns: - frame["DATE"] = pd.to_datetime( - frame["DATE"], infer_datetime_format=True - ) + frame["DATE"] = pd.to_datetime(frame["DATE"]) scheme = [] for column_name in frame.columns: if pd.api.types.is_string_dtype(frame[column_name]): @@ -85,7 +83,6 @@ def find_functions_and_docstring(submod): if name not in {"deck", "eclfiles"} ), "arrow_convertor": find_arrow_convertor(import_path), - "doc": func.__doc__, } return returns @@ -192,30 +189,20 @@ def vfp_to_arrow_dict(datafile, options): Returns: tuple: vfp keyword, then dictionary with key: table_name, value: table """ - logger = logging.getLogger(__file__ + ".vfp_to_arrow_dict") filepath_no_suffix = Path(datafile).with_suffix("") resdatafiles = res2df.ResdataFiles(filepath_no_suffix) vfp_dict = {} keyword = options.get("keyword", ["VFPPROD", "VFPINJ"]) - logger.debug("keyword is %s", keyword) vfpnumbers = options.get("vfpnumbers", None) if isinstance(keyword, str): keywords = [keyword] else: keywords = keyword - logger.debug("%s keywords to go through", len(keywords)) - for keyword in keywords: vfp_dict[keyword] = res2df.vfp._vfp.pyarrow_tables( resdatafiles.get_deck(), keyword=keyword, vfpnumbers_str=vfpnumbers ) - - logger.debug( - "Keyword %s, extracted %s vfp tables", - keyword, - len(vfp_dict[keyword]), - ) return vfp_dict @@ -264,38 +251,3 @@ def add_md_to_rft(rft_table, md_file_path): logger.debug("Head of merged table to return:\n %s", rft_table.head()) return rft_table - - -def give_help(submod, only_general=False): - """Give descriptions of variables available for submodule - - Args: - submod (str): submodule - - Returns: - str: description of submodule input - """ - general_info = """ - This utility uses the library ecl2csv, but uploads directly to sumo. Required options are: - A config file in yaml format, where you specifiy the variables to extract. What is required - is a keyword in the config called "sim2simo". under there you have three optional arguments: - * datafile: this can be a string, a list, or it can be absent altogether - * datatypes: this needs to be a list, or non existent - * options: The options are listed below in the original documentation from ecl2csv. The eclfiles - option is replaced with what is under datafile - - """ - if submod is None: - only_general = True - if only_general: - text_to_return = general_info - else: - try: - text_to_return = general_info + SUBMOD_DICT[submod]["doc"] - except KeyError: - text_to_return = ( - f"subtype {submod} does not exist!!, existing options:\n" - + "\n".join(SUBMODULES) - ) - - return text_to_return diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index de3d1598..9d6a5bb6 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -7,10 +7,8 @@ import psutil import yaml - from fmu.dataio import ExportData from fmu.sumo.uploader import SumoConnection -from fmu.sumo.uploader._fileonjob import FileOnJob from fmu.sumo.uploader._upload_files import upload_files from fmu.sumo.sim2sumo._special_treatments import ( SUBMOD_DICT, @@ -49,15 +47,11 @@ def get_case_uuid(file_path, parent_level=4): Returns: str: the case uuid """ - logger = logging.getLogger(__name__ + ".get_case_uuid") - logger.debug("Asked for parent %s for %s", parent_level, file_path) case_meta_path = ( Path(file_path).parents[parent_level] / "share/metadata/fmu_case.yml" ) - logger.debug("Case meta path: %s", case_meta_path) case_meta = yaml_load(case_meta_path) uuid = case_meta["fmu"]["case"]["uuid"] - logger.info("Case uuid: %s", uuid) return uuid @@ -114,11 +108,12 @@ def find_full_path(datafile, paths): try: return paths[data_name] except KeyError: - mess = ( - "Datafile %s, with derived name %s, not found in %s," - " have to skip" + logger.warning( + "Datafile %s, with derived name %s, not found in %s, have to skip", + datafile, + data_name, + paths, ) - logger.warning(mess, datafile, data_name, paths) return None @@ -137,7 +132,7 @@ def find_datafile_paths(): paths[name] = data_path else: logger.warning( - "Name %s from file %s allready used", name, data_path + "Name %s from file %s already used", name, data_path ) return paths @@ -161,10 +156,7 @@ def create_config_dict(config, datafile=None, datatype=None): logger.debug("Input config keys are %s", config.keys()) simconfig = config.get("sim2sumo", {}) - if len(simconfig) == 0: - logger.warning("We are starting from scratch") - else: - logger.debug("This is the starting point %s", simconfig) + logger.debug("sim2sumo config %s", simconfig) grid3d = simconfig.get("grid3d", False) if isinstance(simconfig, bool): simconfig = {} @@ -403,7 +395,6 @@ def _upload(self): def finish(self): """Cleanup""" - self._logger.info("Final stretch") self._upload() @@ -467,44 +458,6 @@ def generate_meta(config, datafile_path, tagname, obj, content): return metadata -def convert_2_sumo_file(obj, converter, metacreator, meta_args): - """Convert object to sumo file - - Args: - obj (object): the object - converter (func): function to convert to bytestring - metacreator (func): the function that creates the metadata - meta_args (iterable): arguments for generating metadata - - Returns: - SumoFile: file containing obj - """ - logger = logging.getLogger(__name__ + ".convert_2_sumo_file") - logger.debug("Obj type: %s", type(obj)) - logger.debug("Convert function %s", converter) - logger.debug("Meta function %s", metacreator) - logger.debug("Arguments for creating metadata %s", meta_args) - if obj is None: - logger.warning("Nothing to do with None object") - return obj - bytestring = converter(obj) - metadata = metacreator(*meta_args) - logger.debug("Metadata created") - assert isinstance( - metadata, dict - ), f"meta should be dict, but is {type(metadata)}" - assert isinstance( - bytestring, bytes - ), f"bytestring should be bytes, but is {type(bytestring)}" - sumo_file = FileOnJob(bytestring, metadata) - logger.debug("Init of sumo file") - sumo_file.path = metadata["file"]["relative_path"] - sumo_file.metadata_path = "" - sumo_file.size = len(sumo_file.byte_string) - logger.debug("Returning from func") - return sumo_file - - def nodisk_upload(files, parent_id, config_path, env="prod", connection=None): """Upload files to sumo diff --git a/src/fmu/sumo/sim2sumo/grid3d.py b/src/fmu/sumo/sim2sumo/grid3d.py index 58185826..e3a4f574 100755 --- a/src/fmu/sumo/sim2sumo/grid3d.py +++ b/src/fmu/sumo/sim2sumo/grid3d.py @@ -7,7 +7,6 @@ """ import logging from pathlib import Path -import re from datetime import datetime from io import BytesIO @@ -17,11 +16,9 @@ from xtgeo import GridProperty, grid_from_file from xtgeo.grid3d import _gridprop_import_eclrun as eclrun from xtgeo.io._file import FileWrapper +from fmu.sumo.uploader._fileonjob import FileOnJob -from .common import ( - generate_meta, - convert_2_sumo_file, -) +from .common import generate_meta def xtgeo_2_bytestring(obj): @@ -33,20 +30,17 @@ def xtgeo_2_bytestring(obj): Returns: bytestring: bytes """ - logger = logging.getLogger(__name__ + ".xtgeo_2_bytestring") if obj is None: return obj - logger.debug("Converting %s", obj.name) sink = BytesIO() obj.to_file(sink) sink.seek(0) bytestring = sink.getbuffer().tobytes() - logger.debug("Returning bytestring with size %s", len(bytestring)) return bytestring -def generate_grid3d_meta(datafile, obj, prefix, config, content): +def generate_grid3d_meta(datafile, obj, prefix, config): """Generate metadata for xtgeo object Args: @@ -54,21 +48,24 @@ def generate_grid3d_meta(datafile, obj, prefix, config, content): obj (xtgeo object): the object to generate metadata on prefix (str): prefix to include config (dict): the fmu config file - content (str): content for data Returns: dict: the metadata for obj """ - logger = logging.getLogger(__name__ + ".generate_grid3d_meta") - if obj is None: - return obj + if isinstance(obj, Grid): + content = "depth" + else: + content = {"property": {"is_discrete": False}} if prefix == "grid": tagname = prefix else: tagname = f"{prefix}-{obj.name}" metadata = generate_meta(config, datafile, tagname, obj, content) - logger.debug("Generated meta are %s", metadata) + + assert isinstance( + metadata, dict + ), f"meta should be dict, but is {type(metadata)}" return metadata @@ -85,90 +82,18 @@ def convert_xtgeo_2_sumo_file(datafile, obj, prefix, config): Returns: SumoFile: Object containing xtgeo object as bytestring + metadata as dictionary """ - logger = logging.getLogger(__name__ + ".convert_xtgeo_2_sumo_file") - logger.debug("Datafile %s", datafile) - logger.debug("Obj of type: %s", type(obj)) - logger.debug("prefix: %s", prefix) - logger.debug("Config: %s", config) if obj is None: return obj - if isinstance(obj, Grid): - content = "depth" - else: - content = "property" - - meta_args = (datafile, obj, prefix, config, content) - logger.debug( - "sending in %s", - dict( - zip(("datafile", "obj", "prefix", "config", "content"), meta_args) - ), - ) - sumo_file = convert_2_sumo_file( - obj, xtgeo_2_bytestring, generate_grid3d_meta, meta_args - ) - return sumo_file + bytestring = xtgeo_2_bytestring(obj) + metadata = generate_grid3d_meta(datafile, obj, prefix, config) -def get_xtgeo_egrid(datafile): - """Export egrid file to sumo + sumo_file = FileOnJob(bytestring, metadata) + sumo_file.path = metadata["file"]["relative_path"] + sumo_file.metadata_path = "" + sumo_file.size = len(sumo_file.byte_string) - Args: - datafile (str): path to datafile - """ - logger = logging.getLogger(__name__ + ".get_xtgeo_egrid") - logger.debug("Fetching %s", datafile) - egrid_path = str(datafile).replace(".DATA", ".EGRID") - egrid = grid_from_file(egrid_path) - - logger.info("Fetched %s", egrid.name) - return egrid - - -def readname(filename): - """Read keyword from grdecl file - - Args: - filename (str): name of file to read - - Returns: - str: keyword name - """ - logger = logging.getLogger(__name__ + ".readname") - name = "" - linenr = 0 - with open(filename, "r", encoding="utf-8") as file_handle: - for line in file_handle: - linenr += 1 - logger.debug("%s %s", linenr, line) - if "ECHO" in line: - continue - match = re.match(r"^([a-zA-Z].*)", line) - # match = re.match(r"$([a-zA-Z][0-9A-Za-z]+)\s+", line) - if match: - name = match.group(0) - break - if linenr > 20: - break - logger.debug("Property %s", name) - - return name - - -def make_dates_from_timelist(time_list): - """Convert time list format from resdata.RestartFile to strings - - Args: - time_list (ResDataRestartFile.timelist): the input list of dates - - Returns: - list: dates in string format - """ - dates = [] - for date in time_list: - date_str = datetime.strftime(date[1], "%Y-%m-%d") - dates.append(date_str) - return dates + return sumo_file def upload_init(init_path, xtgeoegrid, config, dispatcher): @@ -237,7 +162,6 @@ def upload_restart( count = 0 for prop_name in prop_names: for time_step in time_steps: - try: restart_prop = eclrun.import_gridprop_from_restart( FileWrapper(restart_path), prop_name, xtgeoegrid, time_step @@ -248,9 +172,6 @@ def upload_restart( xtgeo_prop = make_xtgeo_prop(xtgeoegrid, restart_prop) if xtgeo_prop is not None: - # TODO: refactor this if statement together with identical - # code in export_init - # These are identical, and should be treated as such logger.debug("Exporting %s", xtgeo_prop.name) sumo_file = convert_xtgeo_2_sumo_file( restart_path, xtgeo_prop, "UNRST", config @@ -264,7 +185,7 @@ def upload_restart( continue dispatcher.add(sumo_file) count += 1 - logger.info("%s properties sendt on", count) + logger.info("%s properties uploaded", count) return count @@ -277,10 +198,8 @@ def upload_simulation_runs(datafiles, config, dispatcher): config (dict): the fmu config file with metadata dispatcher (sim2sumo.common.Dispatcher) """ - logger = logging.getLogger(__name__ + ".upload_simulation_runs") for datafile in datafiles: if not datafiles[datafile]["grid3d"]: - logger.info("Export of grid3d deactivated for %s", datafile) continue upload_simulation_run(datafile, config, dispatcher) @@ -298,9 +217,6 @@ def upload_simulation_run(datafile, config, dispatcher): grid_path = str(datafile_path.with_suffix(".EGRID")) egrid = Grid(grid_path) xtgeoegrid = grid_from_file(grid_path) - # grid_exp_path = export_object( - # datafile, "grid", config, xtgeoegrid, "depth" - # ) sumo_file = convert_xtgeo_2_sumo_file( restart_path, xtgeoegrid, "grid", config ) @@ -325,19 +241,20 @@ def get_timesteps(restart_path, egrid): list: list of dates """ restart = ResdataRestartFile(egrid, restart_path) - time_steps = make_dates_from_timelist(restart.time_list()) - return time_steps + dates = [] + for date in restart.time_list(): + date_str = datetime.strftime(date[1], "%Y-%m-%d") + dates.append(date_str) + return dates -def make_xtgeo_prop( - xtgeoegrid, prop_dict, describe=False, return_single=False -): + +def make_xtgeo_prop(xtgeoegrid, prop_dict): """Build an xtgeo property from xtgeo record Args: xtgeoegrid (xtgeo.Grid): the grid to connect property to prop_dict (dict): xtgeo record - describe (bool, optional): Print some statistics for property. Defaults to False. Returns: xtgeo.GridProperty: the extracted results @@ -347,14 +264,12 @@ def make_xtgeo_prop( values = prop_dict["values"] single_value = np.unique(values).size == 1 if single_value: - logger.info("%s has only one value", prop_name) - if single_value and not return_single: - xtgeo_prop = None - logger.debug("Will not return single value property") - else: - xtgeo_prop = GridProperty(xtgeoegrid, name=prop_name) - xtgeo_prop.values = values - if describe: - xtgeo_prop.describe() + logger.debug( + "%s has only one value. Will not return single value property.", + prop_name, + ) + return None + xtgeo_prop = GridProperty(xtgeoegrid, name=prop_name) + xtgeo_prop.values = values return xtgeo_prop diff --git a/src/fmu/sumo/sim2sumo/main.py b/src/fmu/sumo/sim2sumo/main.py index f1ef401e..590f58dc 100644 --- a/src/fmu/sumo/sim2sumo/main.py +++ b/src/fmu/sumo/sim2sumo/main.py @@ -6,7 +6,6 @@ from .grid3d import upload_simulation_runs from .tables import upload_tables from .common import yaml_load, Dispatcher, create_config_dict -from ._special_treatments import give_help, SUBMODULES def parse_args(): @@ -37,22 +36,13 @@ def parse_args(): "--datatype", type=str, default=None, - help="Override datatype setting, intented for testing only", + help="Override datatype setting, for testing only", ) parser.add_argument( "--datafile", type=str, default=None, - help="Override datafile setting, intented for testing only", - ) - parser.add_argument( - "--help_on", - type=str, - help=( - "Use this to get documentation of one of the datatypes to upload\n" - + f"valid options are \n{', '.join(SUBMODULES)}" - ), - default="No help", + help="Override datafile setting, for testing only", ) parser.add_argument("--d", help="Activate debug mode", action="store_true") args = parser.parse_args() @@ -69,33 +59,29 @@ def main(): logger = logging.getLogger(__file__ + ".main") args = parse_args() logger.debug("Running with arguments %s", args) - if args.help_on != "No help": - print(give_help(args.help_on)) - else: - logger.info("Will be extracting results") - config = yaml_load(args.config_path) - config["file_path"] = args.config_path - logger.debug("Added file_path, and config keys are %s", config.keys()) - sim2sumoconfig = create_config_dict( - config, args.datafile, args.datatype + + logger.info("Will be extracting results") + config = yaml_load(args.config_path) + config["file_path"] = args.config_path + logger.debug("Added file_path, and config keys are %s", config.keys()) + sim2sumoconfig = create_config_dict(config, args.datafile, args.datatype) + # Init of dispatcher needs one datafile to locate case uuid + one_datafile = list(sim2sumoconfig.keys())[0] + try: + dispatcher = Dispatcher( + one_datafile, args.env, config_path=args.config_path ) - # Init of dispatcher needs one datafile to locate case uuid - one_datafile = list(sim2sumoconfig.keys())[0] - try: - dispatcher = Dispatcher( - one_datafile, args.env, config_path=args.config_path - ) - except Exception as e: - logger.error("Failed to create dispatcher: %s", e) - return + except Exception as e: + logger.error("Failed to create dispatcher: %s", e) + return - logger.debug("Extracting tables") - upload_tables(sim2sumoconfig, config, dispatcher) + logger.debug("Extracting tables") + upload_tables(sim2sumoconfig, config, dispatcher) - logger.debug("Extracting 3dgrid(s) with properties") - upload_simulation_runs(sim2sumoconfig, config, dispatcher) + logger.debug("Extracting 3dgrid(s) with properties") + upload_simulation_runs(sim2sumoconfig, config, dispatcher) - dispatcher.finish() + dispatcher.finish() if __name__ == "__main__": diff --git a/src/fmu/sumo/sim2sumo/tables.py b/src/fmu/sumo/sim2sumo/tables.py index 98b5def3..8266cc88 100644 --- a/src/fmu/sumo/sim2sumo/tables.py +++ b/src/fmu/sumo/sim2sumo/tables.py @@ -6,7 +6,6 @@ """ import logging -from pathlib import Path import sys from typing import Union @@ -14,6 +13,7 @@ import pyarrow.parquet as pq import pandas as pd import res2df +from fmu.sumo.uploader._fileonjob import FileOnJob from ._special_treatments import ( SUBMOD_DICT, @@ -22,38 +22,17 @@ vfp_to_arrow_dict, find_md_log, ) -from .common import ( - generate_meta, - convert_2_sumo_file, -) +from .common import generate_meta SUBMOD_CONTENT = { "summary": "timeseries", "satfunc": "relperm", "vfp": "lift_curves", + "rft": "rft", + "pvt": "pvt", + "transmissibilities": "transmissibilities", } -SUBMOD_CONTENT.update( - {name: name for name in ["rft", "pvt", "transmissibilities"]} -) - - -def table_to_bytes(table: pa.Table): - """Return table as bytestring - - Args: - table (pa.Table): the table to be converted - - Returns: - bytes: table as bytestring - """ - logger = logging.getLogger(__name__ + ".table_to_bytes") - sink = pa.BufferOutputStream() - logger.debug("Writing %s to sink", table) - pq.write_table(table, sink) - byte_string = sink.getvalue().to_pybytes() - logger.debug("Returning bytestring with size %s", len(byte_string)) - return byte_string def table_2_bytestring(table): @@ -63,9 +42,12 @@ def table_2_bytestring(table): table (pa.table): the table to convert Returns: - bytest: the bytes string + bytes: table as bytestring """ - return table_to_bytes(table) + sink = pa.BufferOutputStream() + pq.write_table(table, sink) + bytestring = sink.getvalue().to_pybytes() + return bytestring def generate_table_meta(datafile, obj, tagname, config): @@ -74,22 +56,18 @@ def generate_table_meta(datafile, obj, tagname, config): Args: datafile (str): path to datafile obj (xtgeo object): the object to generate metadata on - prefix (str): prefix to include + tagname: tagname config (dict): the fmu config file - content (str): content for data Returns: dict: the metadata for obj """ - logger = logging.getLogger(__name__ + ".generate_table_meta") - if "vfp" in tagname.lower(): content = "lift_curves" else: content = SUBMOD_CONTENT.get(tagname, "property") metadata = generate_meta(config, datafile, tagname, obj, content) - logger.debug("Generated meta are %s", metadata) return metadata @@ -105,22 +83,17 @@ def convert_table_2_sumo_file(datafile, obj, tagname, config): Returns: SumoFile: Object containing table object as bytestring + metadata as dictionary """ - logger = logging.getLogger(__name__ + ".convert_table_2_sumo_file") - logger.debug("Datafile %s", datafile) - logger.debug("Obj of type: %s", type(obj)) - logger.debug("tagname: %s", tagname) - logger.debug("Config: %s", config) - - meta_args = (datafile, obj, tagname, config) - logger.debug( - "sending in %s", - dict( - zip(("datafile", "obj", "tagname", "config", "content"), meta_args) - ), - ) - sumo_file = convert_2_sumo_file( - obj, table_2_bytestring, generate_table_meta, meta_args - ) + if obj is None: + return obj + + bytestring = table_2_bytestring(obj) + metadata = generate_table_meta(datafile, obj, tagname, config) + + sumo_file = FileOnJob(bytestring, metadata) + sumo_file.path = metadata["file"]["relative_path"] + sumo_file.metadata_path = "" + sumo_file.size = len(sumo_file.byte_string) + return sumo_file @@ -138,9 +111,6 @@ def get_table( pd.DataFrame: the extracted data """ logger = logging.getLogger(__file__ + ".get_table") - logger.debug( - "Input arguments %s", - ) extract_df = SUBMOD_DICT[submod]["extract"] arrow = kwargs.get("arrow", True) try: @@ -150,7 +120,6 @@ def get_table( except KeyError: logger.debug("No arrow key to delete") output = None - trace = None # TODO: see if there is a cleaner way with rft, see functions # find_md_log, and complete_rft, but needs really to be fixed in res2df md_log_file = find_md_log(submod, kwargs) @@ -191,18 +160,11 @@ def get_table( except TypeError: logger.warning("Type error, cannot convert to arrow") - except TypeError: - trace = sys.exc_info()[1] - except FileNotFoundError: - trace = sys.exc_info()[1] - except ValueError: - trace = sys.exc_info()[1] - if trace is not None: + except (TypeError, FileNotFoundError, ValueError): logger.warning( "Trace: %s, \nNo results produced ", - trace, + sys.exc_info()[1], ) - logger.debug("Returning %s", output) return output @@ -214,10 +176,7 @@ def upload_tables(sim2sumoconfig, config, dispatcher): config (dict): the fmu config file with metadata env (str): what environment to upload to """ - logger = logging.getLogger(__file__ + ".upload_tables") - logger.debug("Will upload with settings %s", sim2sumoconfig) for datafile_path, submod_and_options in sim2sumoconfig.items(): - logger.debug("datafile: %s", datafile_path) upload_tables_from_simulation_run( datafile_path, submod_and_options, @@ -237,18 +196,13 @@ def upload_vfp_tables_from_simulation_run( config (dict): the fmu config with metadata dispatcher (sim2sumo.common.Dispatcher): job dispatcher """ - logger = logging.getLogger( - __name__ + ".upload_vfp_tables_from_simulation_run" - ) vfp_dict = vfp_to_arrow_dict(datafile, options) for keyword, tables in vfp_dict.items(): for table in tables: table_number = str( table.schema.metadata[b"TABLE_NUMBER"].decode("utf-8") ) - logger.debug(table) tagname = f"{keyword}_{table_number}" - logger.debug("Generated tagname: %s", tagname) sumo_file = convert_table_2_sumo_file( datafile, table, tagname.lower(), config ) @@ -266,10 +220,9 @@ def upload_tables_from_simulation_run( dispatcher (sim2sumo.common.Dispatcher) """ logger = logging.getLogger(__name__ + ".upload_tables_from_simulation_run") - logger.info("Extracting tables from %s", datafile) for submod, options in submod_and_options.items(): if submod == "grid3d": - logger.debug("No tables for grid3d, skipping") + # No tables for grid3d continue if submod == "vfp": @@ -278,7 +231,6 @@ def upload_tables_from_simulation_run( ) else: table = get_table(datafile, submod, **options) - logger.debug("Sending %s onto file creation", table) sumo_file = convert_table_2_sumo_file( datafile, table, submod, config ) diff --git a/tests/conftest.py b/tests/conftest.py index da3c265f..d4ba8dbf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,19 +12,15 @@ from httpx import HTTPStatusError from sumo.wrapper import SumoClient -from xtgeo import gridproperty_from_file -from fmu.sumo.sim2sumo import grid3d +from xtgeo import grid_from_file, gridproperty_from_file from fmu.sumo.sim2sumo._special_treatments import convert_to_arrow REEK_ROOT = Path(__file__).parent / "data/reek" -REAL_PATH = "realization-0/iter-0/" REEK_REAL0 = REEK_ROOT / "realization-0/iter-0/" REEK_REAL1 = REEK_ROOT / "realization-1/iter-0/" -REEK_BASE = "2_R001_REEK" REEK_ECL_MODEL = REEK_REAL0 / "eclipse/model/" -REEK_DATA_FILE = REEK_ECL_MODEL / f"{REEK_BASE}-0.DATA" -CONFIG_OUT_PATH = REEK_REAL0 / "fmuconfig/output/" -CONFIG_PATH = CONFIG_OUT_PATH / "global_variables.yml" +REEK_DATA_FILE = REEK_ECL_MODEL / "2_R001_REEK-0.DATA" +CONFIG_PATH = REEK_REAL0 / "fmuconfig/output/global_variables.yml" EIGHTCELLS_DATAFILE = REEK_ECL_MODEL / "EIGHTCELLS.DATA" @@ -37,17 +33,24 @@ def set_up_tmp(path): return real0, eight_datafile, config_path +@pytest.fixture(scope="function", name="ert_run_scratch_files") +def _fix_ert_run_scratch_files(tmp_path): + # tmp_path is a fixture provided by pytest + return set_up_tmp(tmp_path / "scratch") + + +@pytest.fixture(scope="session", name="scratch_files") +def _fix_scratch_files(tmp_path_factory): + # tmp_path_factory is a fixture provided by pytest + return set_up_tmp(tmp_path_factory.mktemp("scratch")) + + @pytest.fixture(scope="session", name="token") def _fix_token(): token = os.environ.get("ACCESS_TOKEN") return token if token and len(token) else None -@pytest.fixture(scope="session", name="eightcells_datafile") -def _fix_eight(): - return EIGHTCELLS_DATAFILE - - @pytest.fixture(scope="session", name="eightfipnum") def _fix_fipnum(): return gridproperty_from_file( @@ -77,12 +80,6 @@ def _fix_sumo(token): return SumoClient(env="dev", token=token) -@pytest.fixture(scope="session", name="scratch_files") -def _fix_scratch_files(tmp_path_factory): - - return set_up_tmp(tmp_path_factory.mktemp("scratch")) - - @pytest.fixture(autouse=True, scope="function", name="set_ert_env") def _fix_ert_env(monkeypatch): monkeypatch.setenv("_ERT_REALIZATION_NUMBER", "0") @@ -92,7 +89,6 @@ def _fix_ert_env(monkeypatch): @pytest.fixture(scope="session", name="case_uuid") def _fix_register(scratch_files, token): - root = scratch_files[0].parents[1] case_metadata_path = root / "share/metadata/fmu_case.yml" case_metadata = yaml_load(case_metadata_path) @@ -119,10 +115,49 @@ def _fix_register(scratch_files, token): return sumo_uuid +@pytest.fixture(scope="function", name="ert_run_case_uuid") +def _fix_ert_run_case_uuid(ert_run_scratch_files, token): + root = ert_run_scratch_files[0].parents[1] + case_metadata_path = root / "share/metadata/fmu_case.yml" + case_metadata = yaml_load(case_metadata_path) + case_metadata["fmu"]["case"]["uuid"] = str(uuid.uuid4()) + case_metadata["tracklog"][0] = { + "datetime": datetime.now().isoformat(), + "user": { + "id": "dbs", + }, + "event": "created", + } + with open(case_metadata_path, "w", encoding="utf-8") as stream: + yaml.safe_dump(case_metadata, stream) + sumo_conn = SumoConnection(env="dev", token=token) + case = CaseOnDisk( + case_metadata_path, + sumo_conn, + verbosity="DEBUG", + ) + # Register the case in Sumo + sumo_uuid = case.register() + yield sumo_uuid + + # Teardown + try: + sumo_conn.delete(f"/objects('{sumo_uuid}')") + except HTTPStatusError: + print(f"{sumo_uuid} Already gone..") + + @pytest.fixture(scope="session", name="xtgeogrid") -def _fix_xtgeogrid(eightcells_datafile): +def _fix_xtgeogrid(): + """Export egrid file to sumo + + Args: + datafile (str): path to datafile + """ + egrid_path = str(EIGHTCELLS_DATAFILE).replace(".DATA", ".EGRID") + egrid = grid_from_file(egrid_path) - return grid3d.get_xtgeo_egrid(eightcells_datafile) + return egrid @pytest.fixture(name="teardown", autouse=True, scope="session") diff --git a/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables.yml b/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables.yml index 0907ae16..abdd1afe 100644 --- a/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables.yml +++ b/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables.yml @@ -21,8 +21,6 @@ masterdata: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true + classification: internal sim2sumo: grid3d: true diff --git a/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath.yml b/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath.yml index 7759c637..1a6eb773 100644 --- a/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath.yml +++ b/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath.yml @@ -21,8 +21,6 @@ masterdata: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true + classification: internal sim2sumo: datafile: eclipse/model/2_R001_REEK-0.DATA diff --git a/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml b/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml index 480ac212..a7f3e1eb 100644 --- a/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml +++ b/tests/data/reek/realization-0/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml @@ -21,9 +21,7 @@ masterdata: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true + classification: internal sim2sumo: datafile: eclipse/model/2_R001_REEK-0.DATA datatypes: diff --git a/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--rft.csv.yml b/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--rft.csv.yml index 64dfbd7c..26c498ab 100644 --- a/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--rft.csv.yml +++ b/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--rft.csv.yml @@ -69,9 +69,6 @@ display: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true classification: internal masterdata: smda: diff --git a/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--wellconnstatus.csv.yml b/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--wellconnstatus.csv.yml index a5f572f4..50df4e3b 100644 --- a/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--wellconnstatus.csv.yml +++ b/tests/data/reek/realization-0/iter-0/share/results/tables/.2_r001_reek--wellconnstatus.csv.yml @@ -60,9 +60,6 @@ display: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true classification: internal masterdata: smda: diff --git a/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables.yml b/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables.yml index badeeb23..3ad279c5 100644 --- a/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables.yml +++ b/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables.yml @@ -21,7 +21,5 @@ masterdata: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true + classification: internal sim2sumo: true diff --git a/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath.yml b/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath.yml index 7759c637..1a6eb773 100644 --- a/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath.yml +++ b/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath.yml @@ -21,8 +21,6 @@ masterdata: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true + classification: internal sim2sumo: datafile: eclipse/model/2_R001_REEK-0.DATA diff --git a/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml b/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml index 46516f5e..adc6996c 100644 --- a/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml +++ b/tests/data/reek/realization-1/iter-0/fmuconfig/output/global_variables_w_eclpath_and_extras.yml @@ -21,9 +21,7 @@ masterdata: access: asset: name: Drogon - ssdl: - access_level: internal - rep_include: true + classification: internal sim2sumo: datafile: eclipse/model/2_R001_REEK-0.DATA datatypes: diff --git a/tests/test_functions.py b/tests/test_functions.py index 15ee7449..a2ad84f1 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1,11 +1,8 @@ """Test utility ecl2csv""" -import logging import os -from pathlib import Path from numpy.ma import allclose, allequal from shutil import copytree -from subprocess import PIPE, Popen from time import sleep from io import BytesIO import pandas as pd @@ -13,8 +10,9 @@ import pyarrow.parquet as pq import pytest -from xtgeo import Grid, GridProperty, gridproperty_from_file +from xtgeo import GridProperty, gridproperty_from_file +from fmu.sumo.sim2sumo import grid3d, tables from fmu.sumo.sim2sumo.common import ( find_datafiles, create_config_dict, @@ -23,28 +21,19 @@ find_datefield, find_datafiles_no_seedpoint, filter_options, + get_case_uuid, ) -from fmu.sumo.sim2sumo import grid3d, tables from fmu.sumo.sim2sumo._special_treatments import ( _define_submodules, convert_to_arrow, SUBMODULES, ) -from fmu.sumo.sim2sumo.common import get_case_uuid from fmu.sumo.uploader import SumoConnection -REEK_ROOT = Path(__file__).parent / "data/reek" -REAL_PATH = "realization-0/iter-0/" -REEK_REAL0 = REEK_ROOT / "realization-0/iter-0/" -REEK_REAL1 = REEK_ROOT / "realization-1/iter-0/" -REEK_BASE = "2_R001_REEK" -REEK_ECL_MODEL = REEK_REAL0 / "eclipse/model/" -REEK_DATA_FILE = REEK_ECL_MODEL / f"{REEK_BASE}-0.DATA" -CONFIG_OUT_PATH = REEK_REAL0 / "fmuconfig/output/" -CONFIG_PATH = CONFIG_OUT_PATH / "global_variables.yml" + +from conftest import REEK_REAL0, REEK_REAL1, REEK_DATA_FILE -LOGGER = logging.getLogger(__file__) SLEEP_TIME = 3 @@ -64,18 +53,13 @@ def check_sumo(case_uuid, tag_prefix, correct, class_type, sumo): else: # The plus one is because we are always uploading the parameters.txt automatically check_nr = correct + 1 - print(query) results = sumo.get(path, query).json() - LOGGER.debug(results["hits"]) returned = results["hits"]["total"]["value"] - LOGGER.debug("This is returned %s", returned) assert ( returned == check_nr - ), f"Supposed to upload {correct}, but actual were {returned}" - - print(f"**************\nFound {correct} {class_type} objects") + ), f"Supposed to upload {check_nr}, but actual were {returned}" sumo.delete( path, @@ -86,46 +70,8 @@ def check_sumo(case_uuid, tag_prefix, correct, class_type, sumo): sumo.delete(path, query) -def write_ert_config_and_run(runpath): - ert_config_path = "sim2sumo.ert" - encoding = "utf-8" - ert_full_config_path = runpath / ert_config_path - print(f"Running with path {ert_full_config_path}") - with open(ert_full_config_path, "w", encoding=encoding) as stream: - - stream.write( - f"DEFINE dev\nNUM_REALIZATIONS 1\nMAX_SUBMIT 1\nRUNPATH {runpath}\nFORWARD_MODEL SIM2SUMO" - ) - with Popen( - ["ert", "test_run", str(ert_full_config_path)], - stdout=PIPE, - stderr=PIPE, - ) as process: - stdout, stderr = process.communicate() - - print( - f"After er run all these files where found at runpath {list(Path(runpath).glob('*'))}" - ) - if stdout: - print("stdout:", stdout.decode(encoding), sep="\n") - if stderr: - print("stderr:", stderr.decode(encoding), sep="\n") - try: - error_content = Path(runpath / "ERROR").read_text(encoding=encoding) - except FileNotFoundError: - error_content = "" - assert ( - not error_content - ), f"ERROR file found with content:\n{error_content}" - assert Path( - runpath / "OK" - ).is_file(), f"running {ert_full_config_path}, No OK file" - - def check_expected_exports(expected_exports, shared_grid, prefix): - print("Looking in ", shared_grid) parameters = list(shared_grid.glob(f"*--{prefix.lower()}-*.roff")) - print(parameters) meta = list(shared_grid.glob(f"*--{prefix.lower()}-*.roff.yml")) nr_parameter = len(parameters) nr_meta = len(meta) @@ -147,7 +93,6 @@ def check_expected_exports(expected_exports, shared_grid, prefix): ], ) def test_non_standard_filter_options(submod, options): - returned_options = filter_options(submod, options) assert ( len(returned_options) > 0 @@ -164,11 +109,8 @@ def test_find_datefield(datestring, expected_result): def test_get_case_uuid(case_uuid, scratch_files, monkeypatch): real0 = scratch_files[0] - monkeypatch.chdir(real0) - uuid = get_case_uuid(real0, parent_level=1) - assert uuid == case_uuid @@ -199,7 +141,6 @@ def test_get_case_uuid(case_uuid, scratch_files, monkeypatch): ], ) def test_create_config_dict(config, nrdatafiles, nrsubmodules, tmp_path): - sim2sumo_config = {"sim2sumo": config} real1 = tmp_path / "realone" copytree(REEK_REAL1, real1) @@ -209,7 +150,6 @@ def test_create_config_dict(config, nrdatafiles, nrsubmodules, tmp_path): len(inputs) == nrdatafiles ), f"{inputs.keys()} expected to have len {nrdatafiles} datafiles" for submod, subdict in inputs.items(): - assert ( len(subdict) == nrsubmodules ), f"{subdict} for {submod} expected to have {nrsubmodules} submodules" @@ -229,6 +169,11 @@ def test_xtgeo_2_bytestring(eightfipnum): assert isinstance(bytestr, bytes) +def test_table_2_bytestring(reekrft): + bytestr = tables.table_2_bytestring(reekrft) + assert isinstance(bytestr, bytes) + + def test_convert_xtgeo_2_sumo_file( eightfipnum, scratch_files, config, case_uuid, sumo, monkeypatch, token ): @@ -237,11 +182,9 @@ def test_convert_xtgeo_2_sumo_file( file = grid3d.convert_xtgeo_2_sumo_file( scratch_files[1], eightfipnum, "INIT", config ) - print(case_uuid) - print(file.metadata) - print(file.byte_string) sumo_conn = SumoConnection(env="dev", token=token) nodisk_upload([file], case_uuid, "dev", connection=sumo_conn) + sleep(SLEEP_TIME) obj = get_sumo_object(sumo, case_uuid, "EIGHTCELLS", "FIPNUM") prop = gridproperty_from_file(obj) assert isinstance( @@ -254,17 +197,15 @@ def test_convert_xtgeo_2_sumo_file( def test_convert_table_2_sumo_file( reekrft, scratch_files, config, case_uuid, sumo, monkeypatch, token ): - monkeypatch.chdir(scratch_files[0]) file = tables.convert_table_2_sumo_file( scratch_files[1], reekrft, "rft", config ) - print(file.metadata) - print(file.byte_string) sumo_conn = SumoConnection(env="dev", token=token) nodisk_upload([file], case_uuid, "dev", connection=sumo_conn) + sleep(SLEEP_TIME) obj = get_sumo_object(sumo, case_uuid, "EIGHTCELLS", "rft") table = pq.read_table(obj) assert isinstance( @@ -275,22 +216,19 @@ def test_convert_table_2_sumo_file( def get_sumo_object(sumo, case_uuid, name, tagname): - print("Fetching object with name, and tag", name, tagname) - sleep(SLEEP_TIME) path = f"/objects('{case_uuid}')/search" results = sumo.get( path, f"$query=data.name:{name} AND data.tagname:{tagname}" ).json() - print(results) obj_id = results["hits"]["hits"][0]["_id"] obj = BytesIO(sumo.get(f"/objects('{obj_id}')/blob").content) - print(type(obj)) return obj -def test_generate_grid3d_meta(eightcells_datafile, eightfipnum, config): +def test_generate_grid3d_meta(scratch_files, eightfipnum, config, monkeypatch): + monkeypatch.chdir(scratch_files[0]) meta = grid3d.generate_grid3d_meta( - eightcells_datafile, eightfipnum, "INIT", config, "property" + scratch_files[1], eightfipnum, "INIT", config ) assert isinstance(meta, dict) @@ -366,12 +304,9 @@ def test_upload_simulation_run( def test_submodules_dict(): """Test generation of submodule list""" sublist, submods = _define_submodules() - LOGGER.info(submods) assert isinstance(sublist, tuple) assert isinstance(submods, dict) for submod_name, submod_dict in submods.items(): - LOGGER.info(submod_name) - LOGGER.info(submod_dict) assert isinstance(submod_name, str) assert ( "/" not in submod_name @@ -393,17 +328,14 @@ def test_submodules_dict(): # Skipping wellcompletion data, since this needs zonemap, which none of the others do def test_get_table(submod): """Test fetching of dataframe""" - extras = {} - if submod == "wellcompletiondata": - extras["zonemap"] = "data/reek/zones.lyr" - frame = tables.get_table(REEK_DATA_FILE, submod) + frame = tables.get_table(REEK_DATA_FILE, submod, arrow=False) assert isinstance( - frame, pa.Table - ), f"Call for get_dataframe should produce dataframe, but produces {type(frame)}" + frame, pd.DataFrame + ), f"Call for get_table with arrow=False should produce dataframe, but produces {type(frame)}" frame = tables.get_table(REEK_DATA_FILE, submod, arrow=True) assert isinstance( frame, pa.Table - ), f"Call for get_dataframe with arrow=True should produce pa.Table, but produces {type(frame)}" + ), f"Call for get_table with arrow=True should produce pa.Table, but produces {type(frame)}" if submod == "summary": assert ( frame.schema.field("FOPT").metadata is not None @@ -424,58 +356,15 @@ def test_convert_to_arrow(): assert isinstance(table, pa.Table), "Did not convert to table" -def test_get_xtgeo_egrid(eightcells_datafile): - egrid = grid3d.get_xtgeo_egrid(eightcells_datafile) - assert isinstance(egrid, Grid), f"Expected xtgeo.Grid, got {type(egrid)}" - - -def test_sim2sumo_with_ert(scratch_files, case_uuid, sumo, monkeypatch): - monkeypatch.chdir(scratch_files[0]) - real0 = scratch_files[0] - write_ert_config_and_run(real0) - expected_exports = 88 - path = f"/objects('{case_uuid}')/search" - results = sumo.post( - path, - json={ - "query": { - "bool": { - "must_not": [ - { - "terms": { - "class.keyword": [ - "case", - "iteration", - "realization", - ] - } - } - ], - } - }, - "size": 0, - "track_total_hits": True, - }, - ).json() - - returned = results["hits"]["total"]["value"] - LOGGER.debug("This is returned %s", returned) - assert ( - returned == expected_exports - ), f"Supposed to upload {expected_exports}, but actual were {returned}" - - @pytest.mark.parametrize("real,nrdfiles", [(REEK_REAL0, 2), (REEK_REAL1, 5)]) def test_find_datafiles_reek(real, nrdfiles): - os.chdir(real) datafiles = find_datafiles(None, {}) expected_tools = ["eclipse", "opm", "ix", "pflotran"] assert ( len(datafiles) == nrdfiles - ), f"Haven't found correct nr of datafiles {nrdfiles} files but {len(datafiles)} ({datafiles})" - for datafile in datafiles: - found_path = datafile + ), f"Incorrect number of datafiles found. Expected {nrdfiles} but found {len(datafiles)}" + for found_path in datafiles: parent = found_path.parent.parent.name assert parent in expected_tools, f"|{parent}| not in {expected_tools}" correct_suff = ".DATA" @@ -492,4 +381,3 @@ def test_find_datafiles_no_seedpoint(tmp_path): os.chdir(real1) files = find_datafiles_no_seedpoint() assert len(files) == 5 - print({data_path.name: data_path for data_path in files}) diff --git a/tests/test_w_drogon.py b/tests/test_w_drogon.py index 787b59f3..62570949 100644 --- a/tests/test_w_drogon.py +++ b/tests/test_w_drogon.py @@ -3,9 +3,11 @@ vfp_to_arrow_dict, add_md_to_rft, ) -from fmu.sumo.sim2sumo.tables import upload_vfp_tables_from_simulation_run +from fmu.sumo.sim2sumo.tables import ( + upload_vfp_tables_from_simulation_run, + get_table, +) from fmu.sumo.sim2sumo.common import Dispatcher -from fmu.sumo.sim2sumo.tables import get_table from test_functions import check_sumo import pytest @@ -17,28 +19,25 @@ @pytest.mark.parametrize( "options,keycombo,nrkeys,nrtables", [ - ({}, "VFPPRODVFPINJ", 2, 5), - ({"keyword": "VFPINJ"}, "VFPINJ", 1, 1), - ({"keyword": ["VFPPROD", "VFPINJ"]}, "VFPPRODVFPINJ", 2, 5), - ({"vfpnumbers": "1,2,4"}, "VFPPRODVFPINJ", 2, 3), + ({}, ["VFPPROD", "VFPINJ"], 2, 5), + ({"keyword": "VFPINJ"}, ["VFPINJ"], 1, 1), + ({"keyword": ["VFPPROD", "VFPINJ"]}, ["VFPPROD", "VFPINJ"], 2, 5), + ({"vfpnumbers": "1,2,4"}, ["VFPPROD", "VFPINJ"], 2, 3), ], ) def test_vfp_to_arrow(options, keycombo, nrkeys, nrtables): - arrow_dict = vfp_to_arrow_dict(DROGON_DATAFILE, options) assert len(arrow_dict) == nrkeys nr_tables = 0 - returned_keycombo = "" - for key, value in arrow_dict.items(): + for value in arrow_dict.values(): nr_tables += len(value) - returned_keycombo += key assert ( nr_tables == nrtables ), f"Returned {nr_tables} tables, but should be {nrtables}" - assert ( - returned_keycombo == keycombo - ), f"Returned keycombo {returned_keycombo}, should be {keycombo}" + assert set(arrow_dict.keys()) == set( + keycombo + ), f"Returned keys {arrow_dict.keys()}, should be {keycombo}" def test_vfp_tables_from_simulation_run( diff --git a/tests/test_with_ert.py b/tests/test_with_ert.py new file mode 100644 index 00000000..72de670e --- /dev/null +++ b/tests/test_with_ert.py @@ -0,0 +1,79 @@ +# When ERT runs it makes changes to the files. This can cause issues for other tests if they expect certain files to exist etc. +# Tests that run ERT should therefore create their own temporary file structure, completely separate from other tests. +from pathlib import Path + +from subprocess import PIPE, Popen + + +def write_ert_config_and_run(runpath): + ert_config_path = "sim2sumo.ert" + encoding = "utf-8" + ert_full_config_path = runpath / ert_config_path + print(f"Running with path {ert_full_config_path}") + with open(ert_full_config_path, "w", encoding=encoding) as stream: + + stream.write( + f"DEFINE dev\nNUM_REALIZATIONS 1\nMAX_SUBMIT 1\nRUNPATH {runpath}\nFORWARD_MODEL SIM2SUMO" + ) + with Popen( + ["ert", "test_run", str(ert_full_config_path)], + stdout=PIPE, + stderr=PIPE, + ) as process: + stdout, stderr = process.communicate() + + print( + f"After ert run all these files where found at runpath {[item.name for item in list(Path(runpath).glob('*'))]}" + ) + if stdout: + print("stdout:", stdout.decode(encoding), sep="\n") + if stderr: + print("stderr:", stderr.decode(encoding), sep="\n") + try: + error_content = Path(runpath / "ERROR").read_text(encoding=encoding) + except FileNotFoundError: + error_content = "" + assert ( + not error_content + ), f"ERROR file found with content:\n{error_content}" + assert Path( + runpath / "OK" + ).is_file(), f"running {ert_full_config_path}, No OK file" + + +def test_sim2sumo_with_ert( + ert_run_scratch_files, ert_run_case_uuid, sumo, monkeypatch +): + monkeypatch.chdir(ert_run_scratch_files[0]) + real0 = ert_run_scratch_files[0] + # After this the files in the current directory are changed and parameters.txt no longer exists + write_ert_config_and_run(real0) + expected_exports = 88 + path = f"/objects('{ert_run_case_uuid}')/search" + results = sumo.post( + path, + json={ + "query": { + "bool": { + "must_not": [ + { + "terms": { + "class.keyword": [ + "case", + "iteration", + "realization", + ] + } + } + ], + } + }, + "size": 0, + "track_total_hits": True, + }, + ).json() + + returned = results["hits"]["total"]["value"] + assert ( + returned == expected_exports + ), f"Supposed to upload {expected_exports}, but actual were {returned}" From 5b30fb85f85a2c6683a882d4b0b3631d6b1414d2 Mon Sep 17 00:00:00 2001 From: Runar Ask Johannessen <89020325+equinor-ruaj@users.noreply.github.com> Date: Thu, 17 Oct 2024 09:34:27 +0200 Subject: [PATCH 2/2] Require certain ERT env variables before running (#109) * Require certain ERT env variables before running * Update error message when missing required env vars --- src/fmu/sumo/sim2sumo/main.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/fmu/sumo/sim2sumo/main.py b/src/fmu/sumo/sim2sumo/main.py index 590f58dc..09dec1e6 100644 --- a/src/fmu/sumo/sim2sumo/main.py +++ b/src/fmu/sumo/sim2sumo/main.py @@ -2,6 +2,7 @@ import argparse import logging +from os import environ from .grid3d import upload_simulation_runs from .tables import upload_tables @@ -54,9 +55,24 @@ def parse_args(): return args +# fmu-dataio needs these when creating metadata +REQUIRED_ENV_VARS = ["_ERT_EXPERIMENT_ID", "_ERT_RUNPATH"] + + def main(): """Main function to be called""" logger = logging.getLogger(__file__ + ".main") + + missing = 0 + for envVar in REQUIRED_ENV_VARS: + if environ.get(envVar) is None: + print(f"Required environment variable {envVar} is not set.") + missing += 1 + + if missing > 0: + print("Required ERT environment variables not found. This can happen if sim2sumo was called outside the ERT context. Stopping.") + exit() + args = parse_args() logger.debug("Running with arguments %s", args)