From 485e5e4618db523b0bbdb4ba6fbd8bfab626d7e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Tue, 24 Sep 2024 22:16:07 +0200 Subject: [PATCH 01/15] Prioritize .DATA files over other filetypes if multiple files with same stem --- src/fmu/sumo/sim2sumo/common.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index de3d159..be3cdf6 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -305,12 +305,14 @@ def find_datafiles_no_seedpoint(): logger = logging.getLogger(__file__ + ".find_datafiles_no_seedpoint") cwd = Path().cwd() logger.info("Looking for files in %s", cwd) - valid_filetypes = [".afi", ".DATA", ".in"] - datafiles = list( - filter( - lambda file: file.suffix in valid_filetypes, cwd.glob("*/*/*.*") - ) - ) + valid_filetypes = [".DATA", ".afi", ".in"] + datafiles = [] + for filetypes in valid_filetypes: + datafiles.extend(list( + filter( + lambda file: (file.suffix in valid_filetypes and file.with_suffix('').stem not in [datafile.with_suffix('').stem for datafile in datafiles]), cwd.glob(f"*/*/*{filetypes}") + ) + )) logger.debug("Found the following datafiles %s", datafiles) return datafiles From 12babc7fa9de98f249fcdbec914012ec78962bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Tue, 24 Sep 2024 22:16:50 +0200 Subject: [PATCH 02/15] Formating --- src/fmu/sumo/sim2sumo/common.py | 64 ++++++++++++--------------------- 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index be3cdf6..a44604a 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -80,9 +80,7 @@ def filter_options(submod, kwargs): for key, value in kwargs.items() if (key in submod_options) or key in ["arrow", "md_log_file"] } - filtered["arrow"] = kwargs.get( - "arrow", True - ) # defaulting of arrow happens here + filtered["arrow"] = kwargs.get("arrow", True) # defaulting of arrow happens here logger.debug("After filtering options for %s: %s", submod, filtered) non_options = [key for key in kwargs if key not in filtered] if len(non_options) > 0: @@ -114,10 +112,7 @@ def find_full_path(datafile, paths): try: return paths[data_name] except KeyError: - mess = ( - "Datafile %s, with derived name %s, not found in %s," - " have to skip" - ) + mess = "Datafile %s, with derived name %s, not found in %s," " have to skip" logger.warning(mess, datafile, data_name, paths) return None @@ -136,9 +131,7 @@ def find_datafile_paths(): if name not in paths: paths[name] = data_path else: - logger.warning( - "Name %s from file %s allready used", name, data_path - ) + logger.warning("Name %s from file %s allready used", name, data_path) return paths @@ -181,9 +174,7 @@ def create_config_dict(config, datafile=None, datatype=None): return outdict -def create_config_dict_from_list( - datatype, simconfig, datafiles, paths, grid3d -): +def create_config_dict_from_list(datatype, simconfig, datafiles, paths, grid3d): """Prepare dictionary from list of datafiles and simconfig Args: @@ -256,9 +247,7 @@ def create_config_dict_from_dict(datafiles, paths, grid3d): submod, options, ) - outdict[datafile_path][submod] = filter_options( - submod, options - ) + outdict[datafile_path][submod] = filter_options(submod, options) except AttributeError: for submod in datafiles[datafile]: outdict[datafile_path][submod] = {} @@ -308,11 +297,18 @@ def find_datafiles_no_seedpoint(): valid_filetypes = [".DATA", ".afi", ".in"] datafiles = [] for filetypes in valid_filetypes: - datafiles.extend(list( - filter( - lambda file: (file.suffix in valid_filetypes and file.with_suffix('').stem not in [datafile.with_suffix('').stem for datafile in datafiles]), cwd.glob(f"*/*/*{filetypes}") + datafiles.extend( + list( + filter( + lambda file: ( + file.suffix in valid_filetypes + and file.with_suffix("").stem + not in [datafile.with_suffix("").stem for datafile in datafiles] + ), + cwd.glob(f"*/*/*{filetypes}"), + ) ) - )) + ) logger.debug("Found the following datafiles %s", datafiles) return datafiles @@ -332,17 +328,13 @@ def __init__( self._parentid = get_case_uuid(datafile) self._conn = SumoConnection(env=env, token=token) self._env = env - self._mem_limit = ( - psutil.virtual_memory().available * self._limit_percent - ) + self._mem_limit = psutil.virtual_memory().available * self._limit_percent self._config_path = config_path self._mem_count = 0 self._count = 0 self._objects = [] - self._logger.info( - "Init, parent is %s, and env is %s", self.parentid, self.env - ) + self._logger.info("Init, parent is %s, and env is %s", self.parentid, self.env) @property def parentid(self): @@ -373,9 +365,7 @@ def add(self, file): self._mem_count += file.size self._objects.append(file) self._count += 1 - self._mem_limit = ( - psutil.virtual_memory().available * self._limit_percent - ) + self._mem_limit = psutil.virtual_memory().available * self._limit_percent self._logger.debug( "Count is %s, and mem frac is %f1.1", @@ -462,9 +452,7 @@ def generate_meta(config, datafile_path, tagname, obj, content): relative_parent = str(Path(datafile_path).parents[2]).replace( str(Path(datafile_path).parents[4]), "" ) - metadata["file"] = { - "relative_path": f"{relative_parent}/{name}--{tagname}".lower() - } + metadata["file"] = {"relative_path": f"{relative_parent}/{name}--{tagname}".lower()} logger.debug("Generated metadata are:\n%s", metadata) return metadata @@ -492,9 +480,7 @@ def convert_2_sumo_file(obj, converter, metacreator, meta_args): bytestring = converter(obj) metadata = metacreator(*meta_args) logger.debug("Metadata created") - assert isinstance( - metadata, dict - ), f"meta should be dict, but is {type(metadata)}" + assert isinstance(metadata, dict), f"meta should be dict, but is {type(metadata)}" assert isinstance( bytestring, bytes ), f"bytestring should be bytes, but is {type(bytestring)}" @@ -521,9 +507,7 @@ def nodisk_upload(files, parent_id, config_path, env="prod", connection=None): if len(files) > 0: if connection is None: connection = SumoConnection(env=env) - status = upload_files( - files, parent_id, connection, config_path=config_path - ) + status = upload_files(files, parent_id, connection, config_path=config_path) print("Status after upload: ", end="\n--------------\n") for state, obj_status in status.items(): print(f"{state}: {len(obj_status)}") @@ -543,9 +527,7 @@ def give_name(datafile_path: str) -> str: logger = logging.getLogger(__name__ + ".give_name") logger.info("Giving name from path %s", datafile_path) datafile_path_posix = Path(datafile_path) - base_name = datafile_path_posix.name.replace( - datafile_path_posix.suffix, "" - ) + base_name = datafile_path_posix.name.replace(datafile_path_posix.suffix, "") while base_name[-1].isdigit() or base_name.endswith("-"): base_name = base_name[:-1] logger.info("Returning name %s", base_name) From 86cef1d4f297b588db164530a6f99655542fc0c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Wed, 25 Sep 2024 14:28:52 +0200 Subject: [PATCH 03/15] Datafiles --- src/fmu/sumo/sim2sumo/common.py | 286 ++++++++++---------------------- src/fmu/sumo/sim2sumo/tables.py | 1 + 2 files changed, 90 insertions(+), 197 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index a44604a..692516f 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -3,6 +3,7 @@ import logging import re from pathlib import Path +from collections.abc import Iterable import psutil import yaml @@ -54,6 +55,8 @@ def get_case_uuid(file_path, parent_level=4): case_meta_path = ( Path(file_path).parents[parent_level] / "share/metadata/fmu_case.yml" ) + #case_meta_path = (Path().cwd() / "../../share/metadata/fmu_case.yml" ) + print(case_meta_path) logger.debug("Case meta path: %s", case_meta_path) case_meta = yaml_load(case_meta_path) uuid = case_meta["fmu"]["case"]["uuid"] @@ -97,221 +100,110 @@ def filter_options(submod, kwargs): return filtered -def find_full_path(datafile, paths): - """Find full path for datafile from dictionary +def find_datafiles(seedpoint=None): + """Find datafiles relative to an optional seedpoint or the current working directory. Args: - datafile (str): path or name of path - paths (dict): dictionary of file paths + seedpoint (str|Path|list, optional): Specific file, list of directories, or single directory to search for datafiles. Returns: - Path: path to the full datafile + list: The datafiles found with unique stem names, as full paths. """ - logger = logging.getLogger(__file__ + ".find_full_path") - data_name = give_name(datafile) - try: - return paths[data_name] - except KeyError: - mess = "Datafile %s, with derived name %s, not found in %s," " have to skip" - logger.warning(mess, datafile, data_name, paths) - return None - - -def find_datafile_paths(): - """Find all simulator paths - - Returns: - dict: key is name to use in sumo, value full path to file - """ - logger = logging.getLogger(__file__ + ".find_datafile_paths") - paths = {} - for data_path in find_datafiles_no_seedpoint(): - name = give_name(data_path) - - if name not in paths: - paths[name] = data_path - else: - logger.warning("Name %s from file %s allready used", name, data_path) - - return paths + logger = logging.getLogger(__file__ + ".find_datafiles") + valid_filetypes = [".DATA", ".afi", ".in"] + datafiles = [] + cwd = Path().cwd() # Get the current working directory + + if isinstance(seedpoint, dict): + # Extract the values (paths) from the dictionary and treat them as a list + seedpoint = list(seedpoint.values()) + if isinstance(seedpoint, list): + # If seedpoint is a list, ensure all elements are strings or Path objects + seedpoint = [Path(sp) for sp in seedpoint] + + if seedpoint: + for sp in seedpoint: + full_path = cwd / sp if not sp.is_absolute() else sp # Make the path absolute + if full_path.is_dir(): + # Search for valid files within the directory + for filetype in valid_filetypes: + datafiles.extend(full_path.rglob(f"*{filetype}")) + elif full_path.is_file() and full_path.suffix in valid_filetypes: + # Add the file if it has a valid filetype + datafiles.append(full_path) + else: + logger.warning(f"{full_path} is not a valid directory or datafile with accepted filetype") + else: + # Search the current working directory if no seedpoint is provided + for filetype in valid_filetypes: + datafiles.extend(cwd.rglob(f"*/*/*{filetype}")) + # Filter out files with duplicate stems, keeping the first occurrence + unique_stems = set() + unique_datafiles = [] + for datafile in datafiles: + stem = datafile.with_suffix("").stem + if stem not in unique_stems: + unique_stems.add(stem) + unique_datafiles.append(datafile.resolve()) # Resolve to full path + print(f"Using datafiles: {str(unique_datafiles)} ") + return unique_datafiles def create_config_dict(config, datafile=None, datatype=None): - """Read config settings and make dictionary for use when exporting + """Read config settings and make dictionary for use when exporting. Args: - config (dict): the settings for export of simulator results - datafile (str, None): overule with one datafile - datatype (str, None): overule with one datatype + config (dict): the settings for export of simulator results. + datafile (str|Path|list, None): overrule with one datafile or list of datafiles. + datatype (str|list, None): overrule with one datatype or a list of datatypes. Returns: dict: dictionary with key as path to datafile, value as dict of - submodule and option + submodule and option. """ - # datafile can be read as list, or string which can be either folder or filepath - logger = logging.getLogger(__file__ + ".read_config") - logger.debug("Using extras %s", [datafile, datatype]) - logger.debug("Input config keys are %s", config.keys()) - + logger = logging.getLogger(__file__ + ".create_config_dict") simconfig = config.get("sim2sumo", {}) - if len(simconfig) == 0: - logger.warning("We are starting from scratch") - else: - logger.debug("This is the starting point %s", simconfig) grid3d = simconfig.get("grid3d", False) - if isinstance(simconfig, bool): - simconfig = {} - datafiles = find_datafiles(datafile, simconfig) - paths = find_datafile_paths() - logger.debug("Datafiles %s", datafiles) - if isinstance(datafiles, dict): - outdict = create_config_dict_from_dict(datafiles, paths, grid3d) - else: - outdict = create_config_dict_from_list( - datatype, simconfig, datafiles, paths, grid3d - ) - logger.debug("Returning %s", outdict) - return outdict - - -def create_config_dict_from_list(datatype, simconfig, datafiles, paths, grid3d): - """Prepare dictionary from list of datafiles and simconfig - Args: - datatype (str): datatype to overule input - simconfig (dict): dictionary with input for submods and options - datafiles (list): list of datafiles - paths (dict): list of all relevant datafiles - - Returns: - dict: results as one unified dictionary - """ - logger = logging.getLogger(__file__ + ".prepare_list_for_sendoff") - logger.debug("Simconfig input is: %s", simconfig) - - if datatype is None: - submods = simconfig.get("datatypes", ["summary", "rft", "satfunc"]) - - if submods == "all": - submods = SUBMODULES + # Use the provided datafile or datatype if given, otherwise use simconfig + datafile = datafile if datafile is not None else simconfig.get("datafile", {}) + datatype = datatype if datatype is not None else simconfig.get("datatypes", None) + + # Initialize the dictionary to hold the configuration for each datafile + sim2sumoconfig = {} + + # If datafile is a dictionary, iterate over its items + if isinstance(datafile, dict): + for filepath, submods in datafile.items(): + # Convert the filepath to a Path object + path = Path(filepath) + + # If the path is a directory, find all files within it + if path.is_dir(): + datafiles = find_datafiles(path) + else: + # If the path is a file, use it directly + datafiles = [path] + + # Create config entries for each datafile + for datafile_path in datafiles: + sim2sumoconfig[datafile_path] = {} + for submod in submods: + # Use the global options or default to {"arrow": True} + options = simconfig.get("options", {"arrow": True}) + sim2sumoconfig[datafile_path][submod] = filter_options(submod, options) + sim2sumoconfig[datafile_path]["grid3d"] = grid3d else: - submods = [datatype] - - logger.debug("Submodules to extract with: %s", submods) - outdict = {} - options = simconfig.get("options", {"arrow": True}) - - for datafile in datafiles: - datafile_path = find_full_path(datafile, paths) - if datafile_path is None: - continue - outdict[datafile_path] = {} - try: - suboptions = submods.values() - except AttributeError: - suboptions = options - for submod in submods: - outdict[datafile_path][submod] = filter_options(submod, suboptions) - outdict[datafile_path]["grid3d"] = grid3d - - return outdict - - -def create_config_dict_from_dict(datafiles, paths, grid3d): - """Prepare dictionary containing datafile information - - Args: - datafiles (dict): the dictionary of datafiles - paths (dict): list of all relevant datafiles - - Returns: - dict: results as one unified dictionary - """ - logger = logging.getLogger(__file__ + ".prepare_dict_for_sendoff") - - outdict = {} - for datafile in datafiles: - datafile_path = find_full_path(datafile, paths) - if datafile_path not in paths.values(): - logger.warning("%s not contained in paths", datafile_path) - if datafile_path is None: - continue - outdict[datafile_path] = {} - if datafile_path is None: - continue - try: - for submod, options in datafiles[datafile].items(): - logger.debug( - "%s submod %s:\noptions: %s", - datafile_path, - submod, - options, - ) - outdict[datafile_path][submod] = filter_options(submod, options) - except AttributeError: - for submod in datafiles[datafile]: - outdict[datafile_path][submod] = {} - outdict[datafile_path]["grid3d"] = grid3d - logger.debug("Returning %s", outdict) - return outdict - - -def find_datafiles(seedpoint, simconfig): - """Find all relevant paths that can be datafiles - - Args: - seedpoint (str, list): path of datafile, or list of folders where one can find one - simconfig (dict): the sim2sumo config settings - - Returns: - list: list of datafiles to interrogate - """ - - logger = logging.getLogger(__file__ + ".find_datafiles") - datafiles = [] - seedpoint = simconfig.get("datafile", seedpoint) - if seedpoint is None: - datafiles = find_datafiles_no_seedpoint() - - elif isinstance(seedpoint, (str, Path)): - logger.debug("Using this string %s to find datafile(s)", seedpoint) - datafiles.append(seedpoint) - elif isinstance(seedpoint, list): - logger.debug("%s is list", seedpoint) - datafiles.extend(seedpoint) - else: - datafiles = seedpoint - logger.debug("Datafile(s) to use %s", datafiles) - return datafiles - - -def find_datafiles_no_seedpoint(): - """Find datafiles relative to an ert runpath - - Returns: - list: The datafiles found - """ - logger = logging.getLogger(__file__ + ".find_datafiles_no_seedpoint") - cwd = Path().cwd() - logger.info("Looking for files in %s", cwd) - valid_filetypes = [".DATA", ".afi", ".in"] - datafiles = [] - for filetypes in valid_filetypes: - datafiles.extend( - list( - filter( - lambda file: ( - file.suffix in valid_filetypes - and file.with_suffix("").stem - not in [datafile.with_suffix("").stem for datafile in datafiles] - ), - cwd.glob(f"*/*/*{filetypes}"), - ) - ) - ) - logger.debug("Found the following datafiles %s", datafiles) - return datafiles + # If datafile is not a dictionary, use the existing logic + datafiles_paths = find_datafiles(datafile) + for datafile_path in datafiles_paths: + sim2sumoconfig[datafile_path] = {} + for submod in datatype or []: + options = simconfig.get("options", {"arrow": True}) + sim2sumoconfig[datafile_path][submod] = filter_options(submod, options) + sim2sumoconfig[datafile_path]["grid3d"] = grid3d + return sim2sumoconfig class Dispatcher: """Controls upload to sumo""" @@ -325,7 +217,7 @@ def __init__( ): self._logger = logging.getLogger(__name__ + ".Dispatcher") self._limit_percent = 0.5 - self._parentid = get_case_uuid(datafile) + self._parentid = get_case_uuid(Path.cwd(),parent_level=1) self._conn = SumoConnection(env=env, token=token) self._env = env self._mem_limit = psutil.virtual_memory().available * self._limit_percent diff --git a/src/fmu/sumo/sim2sumo/tables.py b/src/fmu/sumo/sim2sumo/tables.py index 98b5def..1514c79 100644 --- a/src/fmu/sumo/sim2sumo/tables.py +++ b/src/fmu/sumo/sim2sumo/tables.py @@ -217,6 +217,7 @@ def upload_tables(sim2sumoconfig, config, dispatcher): logger = logging.getLogger(__file__ + ".upload_tables") logger.debug("Will upload with settings %s", sim2sumoconfig) for datafile_path, submod_and_options in sim2sumoconfig.items(): + datafile_path = datafile_path.resolve() logger.debug("datafile: %s", datafile_path) upload_tables_from_simulation_run( datafile_path, From ec1603641491ac7c93be5a3f3272d269c0cb3fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 26 Sep 2024 09:05:17 +0200 Subject: [PATCH 04/15] SUBMODULES and formatting --- src/fmu/sumo/sim2sumo/common.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 692516f..8172805 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -3,7 +3,6 @@ import logging import re from pathlib import Path -from collections.abc import Iterable import psutil import yaml @@ -55,8 +54,6 @@ def get_case_uuid(file_path, parent_level=4): case_meta_path = ( Path(file_path).parents[parent_level] / "share/metadata/fmu_case.yml" ) - #case_meta_path = (Path().cwd() / "../../share/metadata/fmu_case.yml" ) - print(case_meta_path) logger.debug("Case meta path: %s", case_meta_path) case_meta = yaml_load(case_meta_path) uuid = case_meta["fmu"]["case"]["uuid"] @@ -123,7 +120,9 @@ def find_datafiles(seedpoint=None): if seedpoint: for sp in seedpoint: - full_path = cwd / sp if not sp.is_absolute() else sp # Make the path absolute + full_path = ( + cwd / sp if not sp.is_absolute() else sp + ) # Make the path absolute if full_path.is_dir(): # Search for valid files within the directory for filetype in valid_filetypes: @@ -132,7 +131,9 @@ def find_datafiles(seedpoint=None): # Add the file if it has a valid filetype datafiles.append(full_path) else: - logger.warning(f"{full_path} is not a valid directory or datafile with accepted filetype") + logger.warning( + f"{full_path} is not a valid directory or datafile with accepted filetype" + ) else: # Search the current working directory if no seedpoint is provided for filetype in valid_filetypes: @@ -146,9 +147,10 @@ def find_datafiles(seedpoint=None): if stem not in unique_stems: unique_stems.add(stem) unique_datafiles.append(datafile.resolve()) # Resolve to full path - print(f"Using datafiles: {str(unique_datafiles)} ") + logger.info(f"Using datafiles: {str(unique_datafiles)} ") return unique_datafiles + def create_config_dict(config, datafile=None, datatype=None): """Read config settings and make dictionary for use when exporting. @@ -169,6 +171,16 @@ def create_config_dict(config, datafile=None, datatype=None): datafile = datafile if datafile is not None else simconfig.get("datafile", {}) datatype = datatype if datatype is not None else simconfig.get("datatypes", None) + if datatype is None: + submods = simconfig.get("datatypes", ["summary", "rft", "satfunc"]) + + if submods == "all": + submods = SUBMODULES + else: + submods = [datatype] + + logger.debug("Submodules to extract with: %s", submods) + # Initialize the dictionary to hold the configuration for each datafile sim2sumoconfig = {} @@ -183,6 +195,7 @@ def create_config_dict(config, datafile=None, datatype=None): datafiles = find_datafiles(path) else: # If the path is a file, use it directly + # Should probably check if it is a file, and of the correct filetype. datafiles = [path] # Create config entries for each datafile @@ -191,7 +204,9 @@ def create_config_dict(config, datafile=None, datatype=None): for submod in submods: # Use the global options or default to {"arrow": True} options = simconfig.get("options", {"arrow": True}) - sim2sumoconfig[datafile_path][submod] = filter_options(submod, options) + sim2sumoconfig[datafile_path][submod] = filter_options( + submod, options + ) sim2sumoconfig[datafile_path]["grid3d"] = grid3d else: # If datafile is not a dictionary, use the existing logic @@ -205,6 +220,7 @@ def create_config_dict(config, datafile=None, datatype=None): return sim2sumoconfig + class Dispatcher: """Controls upload to sumo""" @@ -217,7 +233,7 @@ def __init__( ): self._logger = logging.getLogger(__name__ + ".Dispatcher") self._limit_percent = 0.5 - self._parentid = get_case_uuid(Path.cwd(),parent_level=1) + self._parentid = get_case_uuid(Path.cwd(), parent_level=1) self._conn = SumoConnection(env=env, token=token) self._env = env self._mem_limit = psutil.virtual_memory().available * self._limit_percent From 30f39fea578c8e31db7bdae3d2409c374b44efa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Mon, 30 Sep 2024 08:23:02 +0200 Subject: [PATCH 05/15] Datafile logic --- src/fmu/sumo/sim2sumo/common.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 8172805..1577b84 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -123,17 +123,14 @@ def find_datafiles(seedpoint=None): full_path = ( cwd / sp if not sp.is_absolute() else sp ) # Make the path absolute - if full_path.is_dir(): - # Search for valid files within the directory - for filetype in valid_filetypes: - datafiles.extend(full_path.rglob(f"*{filetype}")) - elif full_path.is_file() and full_path.suffix in valid_filetypes: + + if full_path.is_file() and full_path.suffix in valid_filetypes: # Add the file if it has a valid filetype datafiles.append(full_path) else: - logger.warning( - f"{full_path} is not a valid directory or datafile with accepted filetype" - ) + # Search for valid files within the directory or partly filename + for filetype in valid_filetypes: + datafiles.extend(full_path.rglob(f"*{filetype}")) else: # Search the current working directory if no seedpoint is provided for filetype in valid_filetypes: @@ -147,6 +144,11 @@ def find_datafiles(seedpoint=None): if stem not in unique_stems: unique_stems.add(stem) unique_datafiles.append(datafile.resolve()) # Resolve to full path + else: + logger.info( + f"Not using duplicate {str(stem)} from {str(datafile.resolve())}" + ) + logger.info(f"Using datafiles: {str(unique_datafiles)} ") return unique_datafiles @@ -190,13 +192,12 @@ def create_config_dict(config, datafile=None, datatype=None): # Convert the filepath to a Path object path = Path(filepath) - # If the path is a directory, find all files within it - if path.is_dir(): - datafiles = find_datafiles(path) - else: - # If the path is a file, use it directly - # Should probably check if it is a file, and of the correct filetype. + if path.is_file(): + # If the path is a file, use it directly, not checking filetype datafiles = [path] + # If the path is a directory or part of filename, find all matches + else: + datafiles = find_datafiles(path) # Create config entries for each datafile for datafile_path in datafiles: From 7d073d44d84686927ef119d4c5b1a8fe84a04571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Tue, 1 Oct 2024 10:07:35 +0200 Subject: [PATCH 06/15] Datatype can be a list --- src/fmu/sumo/sim2sumo/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 1577b84..ce1140f 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -178,6 +178,8 @@ def create_config_dict(config, datafile=None, datatype=None): if submods == "all": submods = SUBMODULES + elif isinstance(datatype,list): + submods = datatype else: submods = [datatype] From 8182ce4df95ce7ee1140985bb71f8f25db893c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 12:21:52 +0200 Subject: [PATCH 07/15] Searching for partial filenames or filenames without extension --- src/fmu/sumo/sim2sumo/common.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 63c0b7c..a146172 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -108,28 +108,31 @@ def find_datafiles(seedpoint=None): if isinstance(seedpoint, dict): # Extract the values (paths) from the dictionary and treat them as a list seedpoint = list(seedpoint.values()) - if isinstance(seedpoint, list): + elif isinstance(seedpoint, list): # If seedpoint is a list, ensure all elements are strings or Path objects seedpoint = [Path(sp) for sp in seedpoint] - + else: + seedpoint = [seedpoint] if seedpoint: for sp in seedpoint: full_path = ( cwd / sp if not sp.is_absolute() else sp ) # Make the path absolute - if full_path.is_file() and full_path.suffix in valid_filetypes: # Add the file if it has a valid filetype datafiles.append(full_path) else: - # Search for valid files within the directory or partly filename for filetype in valid_filetypes: - datafiles.extend(full_path.rglob(f"*{filetype}")) + if not full_path.is_dir(): + # Search for valid files within the directory with partly filename + datafiles.extend([f for f in full_path.parent.rglob(f"{full_path.name}*{filetype}")]) + else: + # Search for valid files within the directory + datafiles.extend([f for f in full_path.rglob(f"*{filetype}")]) else: # Search the current working directory if no seedpoint is provided for filetype in valid_filetypes: - datafiles.extend(cwd.rglob(f"*/*/*{filetype}")) - + datafiles.extend([f for f in cwd.rglob(f"*/*/*{filetype}")]) # Filter out files with duplicate stems, keeping the first occurrence unique_stems = set() unique_datafiles = [] From 38f71a09a33f83f6cf6d72278eaf4b7f8f1c13aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 13:05:15 +0200 Subject: [PATCH 08/15] get_case_uuid --- src/fmu/sumo/sim2sumo/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index a146172..2ba8169 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -234,7 +234,7 @@ def __init__( ): self._logger = logging.getLogger(__name__ + ".Dispatcher") self._limit_percent = 0.5 - self._parentid = get_case_uuid(Path.cwd(), parent_level=1) + self._parentid = get_case_uuid(datafile.resolve()) self._conn = SumoConnection(env=env, token=token) self._env = env self._mem_limit = psutil.virtual_memory().available * self._limit_percent From cf3c85bbda0bd3d4b17d63a12a32183438be901f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 13:21:32 +0200 Subject: [PATCH 09/15] elif seedpoint --- src/fmu/sumo/sim2sumo/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 2ba8169..04cdd71 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -111,8 +111,9 @@ def find_datafiles(seedpoint=None): elif isinstance(seedpoint, list): # If seedpoint is a list, ensure all elements are strings or Path objects seedpoint = [Path(sp) for sp in seedpoint] - else: + elif seedpoint: seedpoint = [seedpoint] + if seedpoint: for sp in seedpoint: full_path = ( From 367e121c68d838e2fa689f1686b8c01c6281a94f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 13:24:30 +0200 Subject: [PATCH 10/15] Default datafile to None --- src/fmu/sumo/sim2sumo/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 04cdd71..5489b09 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -169,7 +169,7 @@ def create_config_dict(config, datafile=None, datatype=None): grid3d = simconfig.get("grid3d", False) # Use the provided datafile or datatype if given, otherwise use simconfig - datafile = datafile if datafile is not None else simconfig.get("datafile", {}) + datafile = datafile if datafile is not None else simconfig.get("datafile", None) datatype = datatype if datatype is not None else simconfig.get("datatypes", None) if datatype is None: From 3b2dc27c86e4b89d2a520d51d56a696743497325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 13:26:31 +0200 Subject: [PATCH 11/15] for submod in submods --- src/fmu/sumo/sim2sumo/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 5489b09..09a93d6 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -215,7 +215,7 @@ def create_config_dict(config, datafile=None, datatype=None): datafiles_paths = find_datafiles(datafile) for datafile_path in datafiles_paths: sim2sumoconfig[datafile_path] = {} - for submod in datatype or []: + for submod in submods or []: options = simconfig.get("options", {"arrow": True}) sim2sumoconfig[datafile_path][submod] = filter_options(submod, options) sim2sumoconfig[datafile_path]["grid3d"] = grid3d From 6bcaa06f657e3a127a47059097b29ca2e86d06f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 14:01:28 +0200 Subject: [PATCH 12/15] Allow datafile to be name only and not include subfolders --- src/fmu/sumo/sim2sumo/common.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 09a93d6..0e3595b 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -119,9 +119,13 @@ def find_datafiles(seedpoint=None): full_path = ( cwd / sp if not sp.is_absolute() else sp ) # Make the path absolute - if full_path.is_file() and full_path.suffix in valid_filetypes: - # Add the file if it has a valid filetype - datafiles.append(full_path) + if full_path.suffix in valid_filetypes: + if full_path.is_file(): + # Add the file if it has a valid filetype + datafiles.append(full_path) + else: + for filetype in valid_filetypes: + datafiles.extend([f for f in full_path.parent.rglob(f"{full_path.name}*{filetype}")]) else: for filetype in valid_filetypes: if not full_path.is_dir(): From 4132fcf76d3319ed1f465424408fd48077c0811b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 15:21:37 +0200 Subject: [PATCH 13/15] Update test functions --- tests/test_functions.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/test_functions.py b/tests/test_functions.py index a2ad84f..61e6157 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -19,7 +19,6 @@ nodisk_upload, Dispatcher, find_datefield, - find_datafiles_no_seedpoint, filter_options, get_case_uuid, ) @@ -133,10 +132,10 @@ def test_get_case_uuid(case_uuid, scratch_files, monkeypatch): 3, ), ({"datafile": ["3_R001_REEK", "OOGRE_PF.in"]}, 2, 4), - ({"datafile": "3_R001_REEK"}, 1, 4), - ({"datafile": "3_R001_REEK.DATA"}, 1, 4), - ({"datafile": "OOGRE_IX.afi"}, 1, 4), - ({"datafile": "opm/model/OOGRE_OPM.DATA"}, 1, 4), + ({"datafile": ["3_R001_REEK"]}, 1, 4), + ({"datafile": ["3_R001_REEK-1.DATA"]}, 1, 4), + ({"datafile": ["OOGRE_IX.afi"]}, 1, 4), + ({"datafile": ["opm/model/OOGRE_OPM.DATA"]}, 1, 4), ({"grid3d": True}, 5, 4), ], ) @@ -359,7 +358,7 @@ def test_convert_to_arrow(): @pytest.mark.parametrize("real,nrdfiles", [(REEK_REAL0, 2), (REEK_REAL1, 5)]) def test_find_datafiles_reek(real, nrdfiles): os.chdir(real) - datafiles = find_datafiles(None, {}) + datafiles = find_datafiles(None, None) expected_tools = ["eclipse", "opm", "ix", "pflotran"] assert ( len(datafiles) == nrdfiles @@ -373,11 +372,3 @@ def test_find_datafiles_reek(real, nrdfiles): if parent == "pflotran": correct_suff = ".in" assert found_path.suffix == correct_suff - - -def test_find_datafiles_no_seedpoint(tmp_path): - real1 = tmp_path / "realone" - copytree(REEK_REAL1, real1) - os.chdir(real1) - files = find_datafiles_no_seedpoint() - assert len(files) == 5 From 57de70b6df893ffa652cc695ee8f27c3cc45aa79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 15:27:29 +0200 Subject: [PATCH 14/15] Find datafiles with valid filetype --- src/fmu/sumo/sim2sumo/common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fmu/sumo/sim2sumo/common.py b/src/fmu/sumo/sim2sumo/common.py index 0e3595b..36d1999 100644 --- a/src/fmu/sumo/sim2sumo/common.py +++ b/src/fmu/sumo/sim2sumo/common.py @@ -124,8 +124,7 @@ def find_datafiles(seedpoint=None): # Add the file if it has a valid filetype datafiles.append(full_path) else: - for filetype in valid_filetypes: - datafiles.extend([f for f in full_path.parent.rglob(f"{full_path.name}*{filetype}")]) + datafiles.extend([f for f in full_path.parent.rglob(f"{full_path.name}")]) else: for filetype in valid_filetypes: if not full_path.is_dir(): From 53b3b37ff13e1016f77e07c6468999af203b8fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kjetil=20Bj=C3=B8rke?= Date: Thu, 17 Oct 2024 15:32:48 +0200 Subject: [PATCH 15/15] Fix in tests --- tests/test_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_functions.py b/tests/test_functions.py index 61e6157..2e4d982 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -358,7 +358,7 @@ def test_convert_to_arrow(): @pytest.mark.parametrize("real,nrdfiles", [(REEK_REAL0, 2), (REEK_REAL1, 5)]) def test_find_datafiles_reek(real, nrdfiles): os.chdir(real) - datafiles = find_datafiles(None, None) + datafiles = find_datafiles(None) expected_tools = ["eclipse", "opm", "ix", "pflotran"] assert ( len(datafiles) == nrdfiles