From f2d0b996c7dc3d3701d79fddda81592529fb5728 Mon Sep 17 00:00:00 2001 From: "Igoshev, Iaroslav" Date: Wed, 8 Nov 2023 14:04:47 +0000 Subject: [PATCH] Some refactor fixes Signed-off-by: Igoshev, Iaroslav --- docs/using_unidist/unidist_on_mpi.rst | 18 ++++++++++-------- unidist/core/backends/mpi/core/common.py | 8 ++------ .../core/backends/mpi/core/communication.py | 13 +++++-------- .../core/backends/mpi/core/controller/api.py | 8 ++------ 4 files changed, 19 insertions(+), 28 deletions(-) diff --git a/docs/using_unidist/unidist_on_mpi.rst b/docs/using_unidist/unidist_on_mpi.rst index 25fe9c42..05320730 100644 --- a/docs/using_unidist/unidist_on_mpi.rst +++ b/docs/using_unidist/unidist_on_mpi.rst @@ -58,7 +58,7 @@ To run unidist on MPI in a single node using Controller/Worker model you should MPI worker processes will be spawned dynamically by unidist. -It is worth noting that `Intel MPI implementation `_ supports the ability of spawning MPI processes +It is worth noting that some MPI implementations, e.g., `Intel MPI implementation`_, support the ability of spawning MPI processes without using ``mpiexec`` command so you can run unidist on Intel MPI just with: .. code-block:: bash @@ -122,12 +122,12 @@ Running is almost the same as in a single node, but, in addition, you should use the appropriate parameter for ``mpiexec``. This parameter differs depending on the MPI implementation used. -* For Intel MPI or MPICH: ``-hosts host1,host2``. You can also see `Controlling Process Placement with the Intel® MPI Library`_ -or `MPICH wiki`_ for deeper customization. - -* For OpenMPI: ``-host host1:n1,...,hostM:nM`` -where n1, ..., nM is the number of processes on each node, including Unidist service processes (root and one or some monitors). -You can also see `Scheduling processes across hosts with OpenMPI Library `_ for deeper customization. +* For Intel MPI or MPICH: ``-hosts host1,host2``. + You can also see `Controlling Process Placement with the Intel® MPI Library`_ or + `MPICH wiki`_ for deeper customization. +* For OpenMPI: ``-host host1:n1,...,hostM:nM``, where n1, ..., nM is the number of processes on each node, + including unidist service processes (root and monitor(s)). + You can also see `Scheduling processes across hosts with OpenMPI Library`_ for deeper customization. Running without ``mpiexec`` command ----------------------------------- @@ -171,5 +171,7 @@ Running is the same as in a single node. .. _`SPMD model`: https://en.wikipedia.org/wiki/Single_program,_multiple_data +.. _`Intel MPI implementation`: https://anaconda.org/intel/mpi4py .. _`Controlling Process Placement with the Intel® MPI Library`: https://www.intel.com/content/www/us/en/developer/articles/technical/controlling-process-placement-with-the-intel-mpi-library.html -.. _`MPICH wiki`: https://github.com/pmodels/mpich/blob/main/doc/wiki/how_to/Using_the_Hydra_Process_Manager.md \ No newline at end of file +.. _`MPICH wiki`: https://github.com/pmodels/mpich/blob/main/doc/wiki/how_to/Using_the_Hydra_Process_Manager.md +.. _`Scheduling processes across hosts with OpenMPI Library`: https://docs.open-mpi.org/en/v5.0.x/launching-apps/scheduling.html diff --git a/unidist/core/backends/mpi/core/common.py b/unidist/core/backends/mpi/core/common.py index 47086713..5140ae04 100755 --- a/unidist/core/backends/mpi/core/common.py +++ b/unidist/core/backends/mpi/core/common.py @@ -507,16 +507,12 @@ def is_run_with_mpiexec(): if "Intel" in lib_version and os.getenv("PMI_RANK") is None: return False - if "MPICH" in MPI.Get_library_version() and os.getenv("PMI_RANK") is None: + if "MPICH" in lib_version and os.getenv("PMI_RANK") is None: return False - if ( - "Open MPI" in MPI.Get_library_version() - and os.getenv("OMPI_COMM_WORLD_RANK") is None - ): + if "Open MPI" in lib_version and os.getenv("OMPI_COMM_WORLD_RANK") is None: return False # The latest MSMPI does not support running without mpiexec. # Other MPI libraries haven't been checked. - return True diff --git a/unidist/core/backends/mpi/core/communication.py b/unidist/core/backends/mpi/core/communication.py index 764bfe6b..8220d5a3 100755 --- a/unidist/core/backends/mpi/core/communication.py +++ b/unidist/core/backends/mpi/core/communication.py @@ -139,18 +139,15 @@ def __init__(self, comm): # check running hosts if self.is_root_process() and len(self.topology.keys()) > host_count: warnings.warn( - """The number of running hosts is greater than that specified in the UNIDIST_MPI_HOSTS. - If you want to run the program on a host other than the local one, specify the appropriate parameter for `mpiexec` - (`--host` for OpenMPI or `--hosts` for other MPI implementations) - """ + "The number of running hosts is greater than that specified in the UNIDIST_MPI_HOSTS. " + + "If you want to run the program on a host other than the local one, specify the appropriate parameter for `mpiexec` " + + "(`--host` for OpenMPI and `--hosts` for Intel MPI or MPICH)." ) - - # check running hosts if self.is_root_process() and len(self.topology.keys()) < host_count: warnings.warn( - "The number of running hosts is less than that specified in the UNIDIST_MPI_HOSTS. Check the `mpiexec` option to distribute processes between hosts." + "The number of running hosts is less than that specified in the UNIDIST_MPI_HOSTS. " + + "Check the `mpiexec` option to distribute processes between hosts." ) - if common.is_shared_memory_supported(): self.monitor_processes = [] for host in self.topology: diff --git a/unidist/core/backends/mpi/core/controller/api.py b/unidist/core/backends/mpi/core/controller/api.py index 56eaa15d..cf089cd9 100644 --- a/unidist/core/backends/mpi/core/controller/api.py +++ b/unidist/core/backends/mpi/core/controller/api.py @@ -159,12 +159,11 @@ def init(): if MpiHosts.get_value_source() != ValueSource.DEFAULT: if common.is_run_with_mpiexec(): warnings.warn( - "MpiHosts is not used when running a script using mpiexec." - + "Find out more about running Unidist on MPI cluster in the Unidist documentation." + "MpiHosts is not used when running a script using mpiexec. " + + "Find out more about running unidist on MPI cluster in the unidist documentation." ) else: py_str += [f"cfg.MpiHosts.put('{MpiHosts.get()}')"] - if CpuCount.get_value_source() != ValueSource.DEFAULT: py_str += [f"cfg.CpuCount.put({CpuCount.get()})"] if MpiPickleThreshold.get_value_source() != ValueSource.DEFAULT: @@ -206,15 +205,12 @@ def init(): # See more about Intel MPI environment variables in # https://www.intel.com/content/www/us/en/docs/mpi-library/developer-reference-linux/2021-8/other-environment-variables.html. os.environ["I_MPI_SPAWN"] = "1" - # +1 for just a single process monitor nprocs_to_spawn = cpu_count + 1 - hosts = MpiHosts.get() if hosts is not None and not common.is_run_with_mpiexec(): host_list = hosts.split(",") host_count = len(host_list) - if common.is_shared_memory_supported(): # +host_count to add monitor process on each host nprocs_to_spawn = cpu_count + host_count