From bf970ebf9db6a66d6410e397dd656da7ce56ecc4 Mon Sep 17 00:00:00 2001 From: Daniel Ahlin <50445206+danielahlin@users.noreply.github.com> Date: Fri, 21 Apr 2023 03:38:40 +0200 Subject: [PATCH] nccl-fastsocket: initial packaging for nccl-fastsocket (#36557) * nccl-fastsocket: Add NCCL transport plugin for GCP * nccl-fastsocket: remove auto-gen. header and fix maintainers * Update var/spack/repos/builtin/packages/nccl-fastsocket/package.py * nccl-fastsocket: Add rationale for setting LD_LIBRARY_PATH --------- Co-authored-by: Tamara Dahlgren <35777542+tldahlgren@users.noreply.github.com> --- .../packages/nccl-fastsocket/package.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 var/spack/repos/builtin/packages/nccl-fastsocket/package.py diff --git a/var/spack/repos/builtin/packages/nccl-fastsocket/package.py b/var/spack/repos/builtin/packages/nccl-fastsocket/package.py new file mode 100644 index 00000000000000..f25862b2ebd3f2 --- /dev/null +++ b/var/spack/repos/builtin/packages/nccl-fastsocket/package.py @@ -0,0 +1,65 @@ +# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import tempfile + +from spack.package import * + + +class NcclFastsocket(Package): + """NCCL Fast Socket GCP Net plugin for NCCL""" + + homepage = "https://github.com/google/nccl-fastsocket" + git = "https://github.com/google/nccl-fastsocket.git" + + version("master", preferred=True) + + depends_on("bazel", type="build") + depends_on("nccl", type=["build", "run"]) + + maintainers("danielahlin") + + def setup_build_environment(self, env): + spec = self.spec + tmp_path = tempfile.mkdtemp(prefix="spack") + env.set("TEST_TMPDIR", tmp_path) + env.set("NCCL_INSTALL_PATH", spec["nccl"].prefix) + env.set("NCCL_HDR_PATH", spec["nccl"].prefix.include) + + def install(self, spec, prefix): + tmp_path = env["TEST_TMPDIR"] + # Copied of py-tensorflow + args = [ + # Don't allow user or system .bazelrc to override build settings + "--nohome_rc", + "--nosystem_rc", + # Bazel does not work properly on NFS, switch to /tmp + "--output_user_root=" + tmp_path, + "build", + "libnccl-net.so", + # Spack logs don't handle colored output well + "--color=no", + "--jobs={0}".format(make_jobs), + # Enable verbose output for failures + "--verbose_failures", + # Show (formatted) subcommands being executed + "--subcommands=pretty_print", + # Ask bazel to explain what it's up to + # Needs a filename as argument + "--explain=explainlogfile.txt", + # Increase verbosity of explanation, + "--verbose_explanations", + ] + bazel(*args) + install_tree("bazel-bin", prefix.lib) + + def setup_run_environment(self, env): + # The current plugin pickup method of NCCL is to scan for libraries with certain + # names in the standard library search paths. Consequently, to make nccl-fastsocket + # discoverable to NCCL it is necessary to add it to the LD_LIBRARY_PATH. + env.prepend_path("LD_LIBRARY_PATH", self.prefix.lib) + # NCCL_NET_PLUGIN can be used to change part of the library-name NCCL is looking + # for. This is not necessary for this plugin so it is set to the empty string. + env.set("NCCL_NET_PLUGIN", "")