diff --git a/centos/centos-7.x/centos-7.6-hpc/install_amd_libs.sh b/centos/centos-7.x/centos-7.6-hpc/install_amd_libs.sh index fbd780fc..11497c19 100755 --- a/centos/centos-7.x/centos-7.6-hpc/install_amd_libs.sh +++ b/centos/centos-7.x/centos-7.6-hpc/install_amd_libs.sh @@ -1,4 +1,3 @@ #!/bin/bash -../../common/install_amd_libs.sh - +$COMMON_DIR/../centos/centos-7.x/common/install_amd_libs.sh diff --git a/centos/centos-7.x/centos-7.6-hpc/install_mellanoxofed.sh b/centos/centos-7.x/centos-7.6-hpc/install_mellanoxofed.sh index fd6b6e73..4fc6d52f 100755 --- a/centos/centos-7.x/centos-7.6-hpc/install_mellanoxofed.sh +++ b/centos/centos-7.x/centos-7.6-hpc/install_mellanoxofed.sh @@ -1,9 +1,12 @@ #!/bin/bash set -ex -MLNX_OFED_DOWNLOAD_URL=http://content.mellanox.com/ofed/MLNX_OFED-5.1-0.6.6.0/MLNX_OFED_LINUX-5.1-0.6.6.0-rhel7.6-x86_64.tgz -$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "a6366c43a51dc4e43e672c5c72ed732506f7aa68790103fbc40286a7a39623aa" -tar zxvf MLNX_OFED_LINUX-5.1-0.6.6.0-rhel7.6-x86_64.tgz +MLNX_OFED_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/MLNX_OFED_LINUX-5.1-2.4.6.0-rhel7.6-x86_64.tgz +TARBALL=$(basename ${MLNX_OFED_DOWNLOAD_URL}) +MOFED_FOLDER=$(basename ${MLNX_OFED_DOWNLOAD_URL} .tgz) + +$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "6966b086332f65b76efa41f5604588ba43321f09b634b6c59104423406f11f1e" +tar zxvf ${TARBALL} KERNEL=( $(rpm -q kernel | sed 's/kernel\-//g') ) KERNEL=${KERNEL[-1]} @@ -11,4 +14,4 @@ KERNEL=${KERNEL[-1]} #RELEASE=( $(cat /etc/centos-release | awk '{print $4}') ) #yum -y install http://olcentgbl.trafficmanager.net/centos/${RELEASE}/updates/x86_64/kernel-devel-${KERNEL}.rpm yum install -y kernel-devel-${KERNEL} -./MLNX_OFED_LINUX-5.1-0.6.6.0-rhel7.6-x86_64/mlnxofedinstall --kernel $KERNEL --kernel-sources /usr/src/kernels/${KERNEL} --add-kernel-support --skip-repo +./${MOFED_FOLDER}/mlnxofedinstall --kernel $KERNEL --kernel-sources /usr/src/kernels/${KERNEL} --add-kernel-support --skip-repo diff --git a/centos/centos-7.x/centos-7.6-hpc/install_mpis.sh b/centos/centos-7.x/centos-7.6-hpc/install_mpis.sh index 79312ec9..50b3d811 100755 --- a/centos/centos-7.x/centos-7.6-hpc/install_mpis.sh +++ b/centos/centos-7.x/centos-7.6-hpc/install_mpis.sh @@ -11,13 +11,16 @@ set GCC=/opt/${GCC_VERSION}/bin/gcc INSTALL_PREFIX=/opt -# HPC-X v2.7.0 -HPCX_VERSION="v2.7.0" -HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.0-gcc9.2.0-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.6-x86_64.tbz -$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "18d4be252d11b80d1e00cda814861d24fe9defcd800e5b1892c23148bbb460cf" -tar -xvf hpcx-${HPCX_VERSION}-gcc9.2.0-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.6-x86_64.tbz -mv hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.6-x86_64 ${INSTALL_PREFIX} -HPCX_PATH=${INSTALL_PREFIX}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.6-x86_64 +# HPC-X v2.7.2 +HPCX_VERSION="v2.7.2" +HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.2-gcc-MLNX_OFED_LINUX-5.1-2.4.6.0-redhat7.6-x86_64.tbz +TARBALL=$(basename ${HPCX_DOWNLOAD_URL}) +HPCX_FOLDER=$(basename ${HPCX_DOWNLOAD_URL} .tbz) + +$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "520b1d8f30f55da954fb2af3766b770ebdbd3f07c154ec6621adda656c44692c" +tar -xvf ${TARBALL} +mv ${HPCX_FOLDER} ${INSTALL_PREFIX} +HPCX_PATH=${INSTALL_PREFIX}/${HPCX_FOLDER} # Setup module files for MPIs mkdir -p /usr/share/Modules/modulefiles/mpi/ diff --git a/centos/centos-7.x/centos-7.7-hpc/install_amd_libs.sh b/centos/centos-7.x/centos-7.7-hpc/install_amd_libs.sh index fbd780fc..11497c19 100755 --- a/centos/centos-7.x/centos-7.7-hpc/install_amd_libs.sh +++ b/centos/centos-7.x/centos-7.7-hpc/install_amd_libs.sh @@ -1,4 +1,3 @@ #!/bin/bash -../../common/install_amd_libs.sh - +$COMMON_DIR/../centos/centos-7.x/common/install_amd_libs.sh diff --git a/centos/centos-7.x/centos-7.7-hpc/install_mellanoxofed.sh b/centos/centos-7.x/centos-7.7-hpc/install_mellanoxofed.sh index ec2a91ab..26e0a8ab 100755 --- a/centos/centos-7.x/centos-7.7-hpc/install_mellanoxofed.sh +++ b/centos/centos-7.x/centos-7.7-hpc/install_mellanoxofed.sh @@ -1,9 +1,12 @@ #!/bin/bash set -ex -MLNX_OFED_DOWNLOAD_URL=http://content.mellanox.com/ofed/MLNX_OFED-5.1-0.6.6.0/MLNX_OFED_LINUX-5.1-0.6.6.0-rhel7.7-x86_64.tgz -$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "22ef65aa07ba4d5e2af57465053440fad18a71528e3040d94b4fec5a38803fe8" -tar zxvf MLNX_OFED_LINUX-5.1-0.6.6.0-rhel7.7-x86_64.tgz +MLNX_OFED_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/MLNX_OFED_LINUX-5.1-2.4.6.0-rhel7.7-x86_64.tgz +TARBALL=$(basename ${MLNX_OFED_DOWNLOAD_URL}) +MOFED_FOLDER=$(basename ${MLNX_OFED_DOWNLOAD_URL} .tgz) + +$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "777fe2be20f4ea7fc2e665f490a8371957b2ac3877cc72019d2e09448dc775f1" +tar zxvf ${TARBALL} KERNEL=( $(rpm -q kernel | sed 's/kernel\-//g') ) KERNEL=${KERNEL[-1]} @@ -11,5 +14,5 @@ KERNEL=${KERNEL[-1]} #RELEASE=( $(cat /etc/centos-release | awk '{print $4}') ) #yum -y install http://olcentgbl.trafficmanager.net/centos/${RELEASE}/updates/x86_64/kernel-devel-${KERNEL}.rpm yum install -y kernel-devel-${KERNEL} -./MLNX_OFED_LINUX-5.1-0.6.6.0-rhel7.7-x86_64/mlnxofedinstall --kernel $KERNEL --kernel-sources /usr/src/kernels/${KERNEL} --add-kernel-support --skip-repo +./${MOFED_FOLDER}/mlnxofedinstall --kernel $KERNEL --kernel-sources /usr/src/kernels/${KERNEL} --add-kernel-support --skip-repo diff --git a/centos/centos-7.x/centos-7.7-hpc/install_mpis.sh b/centos/centos-7.x/centos-7.7-hpc/install_mpis.sh index 9c959adb..74daabd7 100755 --- a/centos/centos-7.x/centos-7.7-hpc/install_mpis.sh +++ b/centos/centos-7.x/centos-7.7-hpc/install_mpis.sh @@ -11,13 +11,16 @@ set GCC=/opt/${GCC_VERSION}/bin/gcc INSTALL_PREFIX=/opt -# HPC-X v2.7.0 -HPCX_VERSION="v2.7.0" -HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.0-gcc9.2.0-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.7-x86_64.tbz -$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "f910433576c76025d40829a8a5baee928b9cda8a2400353c1d5696f1d494ee41" -tar -xvf hpcx-${HPCX_VERSION}-gcc9.2.0-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.7-x86_64.tbz -mv hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.7-x86_64 ${INSTALL_PREFIX} -HPCX_PATH=${INSTALL_PREFIX}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.7-x86_64 +# HPC-X v2.7.2 +HPCX_VERSION="v2.7.2" +HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.2-gcc-MLNX_OFED_LINUX-5.1-2.4.6.0-redhat7.7-x86_64.tbz +TARBALL=$(basename ${HPCX_DOWNLOAD_URL}) +HPCX_FOLDER=$(basename ${HPCX_DOWNLOAD_URL} .tbz) + +$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "e038a037b1a42e0386f4f99987680f60b931c1ddaceeb168f0b20b31ce4b8554" +tar -xvf ${TARBALL} +mv ${HPCX_FOLDER} ${INSTALL_PREFIX} +HPCX_PATH=${INSTALL_PREFIX}/${HPCX_FOLDER} # Setup module files for MPIs mkdir -p /usr/share/Modules/modulefiles/mpi/ diff --git a/centos/centos-7.x/centos-7.8-hpc/install_amd_libs.sh b/centos/centos-7.x/centos-7.8-hpc/install_amd_libs.sh index fbd780fc..11497c19 100755 --- a/centos/centos-7.x/centos-7.8-hpc/install_amd_libs.sh +++ b/centos/centos-7.x/centos-7.8-hpc/install_amd_libs.sh @@ -1,4 +1,3 @@ #!/bin/bash -../../common/install_amd_libs.sh - +$COMMON_DIR/../centos/centos-7.x/common/install_amd_libs.sh diff --git a/centos/centos-7.x/centos-7.8-hpc/install_mellanoxofed.sh b/centos/centos-7.x/centos-7.8-hpc/install_mellanoxofed.sh index 342f826b..9865e64f 100755 --- a/centos/centos-7.x/centos-7.8-hpc/install_mellanoxofed.sh +++ b/centos/centos-7.x/centos-7.8-hpc/install_mellanoxofed.sh @@ -1,11 +1,11 @@ #!/bin/bash set -ex -MLNX_OFED_DOWNLOAD_URL=http://content.mellanox.com/ofed/MLNX_OFED-5.1-2.3.7.1/MLNX_OFED_LINUX-5.1-2.3.7.1-rhel7.8-x86_64.tgz +MLNX_OFED_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/MLNX_OFED_LINUX-5.1-2.4.6.0-rhel7.8-x86_64.tgz TARBALL=$(basename ${MLNX_OFED_DOWNLOAD_URL}) MOFED_FOLDER=$(basename ${MLNX_OFED_DOWNLOAD_URL} .tgz) -$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "9d3b8ed5c16f91622552960ea5b28e56aaed6a523174edf4979001c7dc307ac9" +$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "86aa6256ff828db0fa31c06c1ed76831e335ad7fe9a04f7c178029177497ee6a" tar zxvf ${TARBALL} KERNEL=( $(rpm -q kernel | sed 's/kernel\-//g') ) diff --git a/centos/centos-7.x/centos-7.8-hpc/install_mpis.sh b/centos/centos-7.x/centos-7.8-hpc/install_mpis.sh index 21d5494e..28eec268 100755 --- a/centos/centos-7.x/centos-7.8-hpc/install_mpis.sh +++ b/centos/centos-7.x/centos-7.8-hpc/install_mpis.sh @@ -11,14 +11,14 @@ set GCC=/opt/${GCC_VERSION}/bin/gcc INSTALL_PREFIX=/opt -# HPC-X v2.7.0 -HPCX_VERSION="v2.7.0" -HPCX_DOWNLOAD_URL=http://content.mellanox.com/hpc/hpc-x/v2.7/hpcx-v2.7.0-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.8-x86_64.tbz -HPCX_TARBALL=$(basename ${HPCX_DOWNLOAD_URL}) +# HPC-X v2.7.2 +HPCX_VERSION="v2.7.2" +HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.2-gcc-MLNX_OFED_LINUX-5.1-2.4.6.0-redhat7.8-x86_64.tbz +TARBALL=$(basename ${HPCX_DOWNLOAD_URL}) HPCX_FOLDER=$(basename ${HPCX_DOWNLOAD_URL} .tbz) -$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "07f5dfb8f3e85371b4f6ca0da8be35f5f6002def748266a54493ce0d96a766e3" -tar -xvf ${HPCX_TARBALL} +$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "9dba1dd103e004dadf9e6784468926a47650af2f8b80a2893b0f49c5328cd507" +tar -xvf ${TARBALL} mv ${HPCX_FOLDER} ${INSTALL_PREFIX} HPCX_PATH=${INSTALL_PREFIX}/${HPCX_FOLDER} diff --git a/centos/centos-7.x/common/install_amd_libs.sh b/centos/centos-7.x/common/install_amd_libs.sh new file mode 100755 index 00000000..227805bf --- /dev/null +++ b/centos/centos-7.x/common/install_amd_libs.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -ex + +INSTALL_PREFIX=/opt/amd +mkdir -p ${INSTALL_PREFIX} + +AOCL_VERSION="2.2-4" +AOCL_DOWNLOAD_URL="https://azhpcstor.blob.core.windows.net/azhpc-images-store/aocl-linux-aocc-2.2-4_centos7.tar.gz" +$COMMON_DIR/download_and_verify.sh $AOCL_DOWNLOAD_URL "4bdc5caec0233066ded3e2bfedb1e03bdeec82725361764d8860075f64ff4031" + +TARBALL=$(basename ${AOCL_DOWNLOAD_URL}) +tar -xvf ${TARBALL} +cd aocl-linux-aocc-${AOCL_VERSION} + +./install.sh -t amd -l blis fftw libflame +cp -r amd/2.2/* ${INSTALL_PREFIX} +cd .. && rm -rf aocl-linux-aocc-${AOCL_VERSION} + +# Setup module files for AMD Libraries +mkdir -p /usr/share/Modules/modulefiles/amd/ + +# fftw +cat << EOF >> /usr/share/Modules/modulefiles/amd/aocl-${AOCL_VERSION} +#%Module 1.0 +# +# AOCL +# +prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/lib +setenv AMD_FFTW_INCLUDE ${INSTALL_PREFIX}/include +EOF + +# Create symlinks for modulefiles +ln -s /usr/share/Modules/modulefiles/amd/aocl-${AOCL_VERSION} /usr/share/Modules/modulefiles/amd/aocl diff --git a/centos/centos-8.x/centos-8.1-hpc/install_amd_libs.sh b/centos/centos-8.x/centos-8.1-hpc/install_amd_libs.sh index fbd780fc..657d2b5b 100755 --- a/centos/centos-8.x/centos-8.1-hpc/install_amd_libs.sh +++ b/centos/centos-8.x/centos-8.1-hpc/install_amd_libs.sh @@ -1,4 +1,3 @@ #!/bin/bash -../../common/install_amd_libs.sh - +$COMMON_DIR/../centos/centos-8.x/common/install_amd_libs.sh diff --git a/centos/centos-8.x/centos-8.1-hpc/install_mellanoxofed.sh b/centos/centos-8.x/centos-8.1-hpc/install_mellanoxofed.sh index d49bda1e..d0e0c4c6 100755 --- a/centos/centos-8.x/centos-8.1-hpc/install_mellanoxofed.sh +++ b/centos/centos-8.x/centos-8.1-hpc/install_mellanoxofed.sh @@ -1,9 +1,12 @@ #!/bin/bash set -ex -MLNX_OFED_DOWNLOAD_URL=http://content.mellanox.com/ofed/MLNX_OFED-5.1-0.6.6.0/MLNX_OFED_LINUX-5.1-0.6.6.0-rhel8.1-x86_64.tgz -$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "9d5ca5a32270c0c577582abab7c5f441b439c9b69229b2c2e91fdee6be018f61" -tar zxvf MLNX_OFED_LINUX-5.1-0.6.6.0-rhel8.1-x86_64.tgz +MLNX_OFED_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/MLNX_OFED_LINUX-5.1-2.4.6.0-rhel8.1-x86_64.tgz +TARBALL=$(basename ${MLNX_OFED_DOWNLOAD_URL}) +MOFED_FOLDER=$(basename ${MLNX_OFED_DOWNLOAD_URL} .tgz) + +$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "7a92e363f5a6aeb9cd6c2290c0ca1dc7334003dd614faa782d164057742b118e" +tar zxvf ${TARBALL} KERNEL=( $(rpm -q kernel | sed 's/kernel\-//g') ) KERNEL=${KERNEL[-1]} @@ -11,4 +14,5 @@ KERNEL=${KERNEL[-1]} #RELEASE=( $(cat /etc/centos-release | awk '{print $4}') ) #yum install -y http://olcentwus.cloudapp.net/centos/${RELEASE}/BaseOS/x86_64/os/kernel-devel-${KERNEL}.rpm yum install -y kernel-devel-${KERNEL} -./MLNX_OFED_LINUX-5.1-0.6.6.0-rhel8.1-x86_64/mlnxofedinstall --kernel $KERNEL --kernel-sources /usr/src/kernels/${KERNEL} --add-kernel-support --skip-repo +./${MOFED_FOLDER}/mlnxofedinstall --kernel $KERNEL --kernel-sources /usr/src/kernels/${KERNEL} --add-kernel-support --skip-repo + diff --git a/centos/centos-8.x/centos-8.1-hpc/install_mpis.sh b/centos/centos-8.x/centos-8.1-hpc/install_mpis.sh index 58347a90..6840736c 100755 --- a/centos/centos-8.x/centos-8.1-hpc/install_mpis.sh +++ b/centos/centos-8.x/centos-8.1-hpc/install_mpis.sh @@ -11,13 +11,16 @@ set GCC=/opt/${GCC_VERSION}/bin/gcc INSTALL_PREFIX=/opt -# HPC-X v2.7.0 -HPCX_VERSION="v2.7.0" -HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.0-gcc9.2.0-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat8.1-x86_64.tbz -$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "0861622335f67b9f84556bc6a7d8758aa373943d69b49175526b99c53c576732" -tar -xvf hpcx-${HPCX_VERSION}-gcc9.2.0-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat8.1-x86_64.tbz -mv hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat8.1-x86_64 ${INSTALL_PREFIX} -HPCX_PATH=${INSTALL_PREFIX}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat8.1-x86_64 +# HPC-X v2.7.2 +HPCX_VERSION="v2.7.2" +HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.2-gcc-MLNX_OFED_LINUX-5.1-2.4.6.0-redhat8.1-x86_64.tbz +TARBALL=$(basename ${HPCX_DOWNLOAD_URL}) +HPCX_FOLDER=$(basename ${HPCX_DOWNLOAD_URL} .tbz) + +$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "6b0893ccd72c554396ef7827a3bef7cd6fcb15f288303d61dab5f2f78b029b57" +tar -xvf ${TARBALL} +mv ${HPCX_FOLDER} ${INSTALL_PREFIX} +HPCX_PATH=${INSTALL_PREFIX}/${HPCX_FOLDER} # Setup module files for MPIs mkdir -p /usr/share/Modules/modulefiles/mpi/ diff --git a/centos/centos-8.x/common/install_amd_libs.sh b/centos/centos-8.x/common/install_amd_libs.sh new file mode 100755 index 00000000..a3687b93 --- /dev/null +++ b/centos/centos-8.x/common/install_amd_libs.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -ex + +INSTALL_PREFIX=/opt/amd +mkdir -p ${INSTALL_PREFIX} + +AOCL_VERSION="2.2.1" +AOCL_DOWNLOAD_URL="https://azhpcstor.blob.core.windows.net/azhpc-images-store/aocl-linux-aocc-2.2.1_centos8.tar.gz" +$COMMON_DIR/download_and_verify.sh $AOCL_DOWNLOAD_URL "cbe5afbdc241047a9d8814b5557be429aa0d9d2b83408eca8244e1ab9c8e2c87" + +TARBALL=$(basename ${AOCL_DOWNLOAD_URL}) +tar -xvf ${TARBALL} +cd aocl-linux-aocc-${AOCL_VERSION}_centos8 + +./install.sh -t amd -l blis fftw libflame +cp -r amd/${AOCL_VERSION}_centos8/* ${INSTALL_PREFIX} +cd .. && rm -rf aocl-linux-aocc-${AOCL_VERSION}_centos8 + +# Setup module files for AMD Libraries +mkdir -p /usr/share/Modules/modulefiles/amd/ + +# fftw +cat << EOF >> /usr/share/Modules/modulefiles/amd/aocl-${AOCL_VERSION} +#%Module 1.0 +# +# AOCL +# +prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/lib +setenv AMD_FFTW_INCLUDE ${INSTALL_PREFIX}/include +EOF + +# Create symlinks for modulefiles +ln -s /usr/share/Modules/modulefiles/amd/aocl-${AOCL_VERSION} /usr/share/Modules/modulefiles/amd/aocl diff --git a/centos/common/hpc-tuning.sh b/centos/common/hpc-tuning.sh index d51e4d60..896fcec9 100755 --- a/centos/common/hpc-tuning.sh +++ b/centos/common/hpc-tuning.sh @@ -18,7 +18,11 @@ echo "vm.zone_reclaim_mode = 1" >> /etc/sysctl.conf sysctl -p # Configure WALinuxAgent -sed -i -e 's/# OS.EnableRDMA=y/OS.EnableRDMA=y/g' /etc/waagent.conf -# sed -i -e 's/CGroups.EnforceLimits=n/CGroups.EnforceLimits=y/g' /etc/waagent.conf -systemctl enable waagent - +sudo sed -i -e 's/# OS.EnableRDMA=y/OS.EnableRDMA=y/g' /etc/waagent.conf +echo "Extensions.GoalStatePeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.EnableFirewallPeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.RemovePersistentNetRulesPeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.RootDeviceScsiTimeoutPeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.MonitorDhcpClientRestartPeriod=60" | sudo tee -a /etc/waagent.conf +echo "Provisioning.MonitorHostNamePeriod=60" | sudo tee -a /etc/waagent.conf +sudo systemctl restart waagent diff --git a/centos/common/install_amd_libs.sh b/centos/common/install_amd_libs.sh deleted file mode 100755 index 731ddd66..00000000 --- a/centos/common/install_amd_libs.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -set -ex - -# Load gcc -GCC_VERSION=gcc-9.2.0 -export PATH=/opt/${GCC_VERSION}/bin:$PATH -export LD_LIBRARY_PATH=/opt/${GCC_VERSION}/lib64:$LD_LIBRARY_PATH -set CC=/opt/${GCC_VERSION}/bin/gcc -set GCC=/opt/${GCC_VERSION}/bin/gcc - -INSTALL_PREFIX=/opt/amd -mkdir -p ${INSTALL_PREFIX} - -# AMD FFTW -FFTW_DOWNLOAD_URL=https://github.com/amd/amd-fftw/releases/download/2.2/aocl-fftw-linux-gcc-2.2-4.tar.gz -$COMMON_DIR/download_and_verify.sh $FFTW_DOWNLOAD_URL "06afb759e3419a0ea7ba9b08dd217d94d75e4bbb76e847345a1fa75af64fa60b" -tar -xvf aocl-fftw-linux-gcc-2.2-4.tar.gz -cp -r amd-fftw ${INSTALL_PREFIX}/fftw - - -# AMD libflame -LIBFLAME_DOWNLOAD_URL=https://github.com/amd/libflame/releases/download/2.2/aocl-libflame-linux-gcc-2.2-4.tar.gz -$COMMON_DIR/download_and_verify.sh $LIBFLAME_DOWNLOAD_URL "13e3eb9e174ff3c9f44f33e8c9b2bf9d7513ea5840109a4d37a2fa4f769f3451" -tar -xvf aocl-libflame-linux-gcc-2.2-4.tar.gz -cp -r amd-libflame ${INSTALL_PREFIX}/libflame - - -# AMD blis & AMD blis-mt -BLIS_DOWNLOAD_URL=https://github.com/amd/blis/releases/download/2.2/aocl-blis-linux-gcc-2.2-4.tar.gz -$COMMON_DIR/download_and_verify.sh $BLIS_DOWNLOAD_URL "e9bd8bc808a3cb8b84ff46f1f3c12c214c2699c80b82d43e3bedf24b9bb79ac6" -tar -xvf aocl-blis-linux-gcc-2.2-4.tar.gz -cp -r amd-blis ${INSTALL_PREFIX}/blis - -FFTW_VERSION="2.2" -LIBFLAME_VERSION="2.2" -BLIS_VERSION="2.2" - -# Setup module files for AMD Libraries -mkdir -p /usr/share/Modules/modulefiles/amd/ - -# fftw -cat << EOF >> /usr/share/Modules/modulefiles/amd/fftw-${FFTW_VERSION} -#%Module 1.0 -# -# fftw -# -module load ${GCC_VERSION} -prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/fftw/lib -setenv AMD_FFTW_INCLUDE ${INSTALL_PREFIX}/fftw/include -EOF - -# libflame -cat << EOF >> /usr/share/Modules/modulefiles/amd/libflame-${LIBFLAME_VERSION} -#%Module 1.0 -# -# libflame -# -module load ${GCC_VERSION} -prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/libflame/lib -setenv AMD_LIBFLAME_INCLUDE ${INSTALL_PREFIX}/libflame/include -EOF - -# blis & blis-mt -cat << EOF >> /usr/share/Modules/modulefiles/amd/blis-${BLIS_VERSION} -#%Module 1.0 -# -# blis -# -module load ${GCC_VERSION} -prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/blis/lib -setenv AMD_BLIS_INCLUDE ${INSTALL_PREFIX}/blis/include -EOF - -# Create symlinks for modulefiles -ln -s /usr/share/Modules/modulefiles/amd/fftw-${FFTW_VERSION} /usr/share/Modules/modulefiles/amd/fftw -ln -s /usr/share/Modules/modulefiles/amd/libflame-${LIBFLAME_VERSION} /usr/share/Modules/modulefiles/amd/libflame -ln -s /usr/share/Modules/modulefiles/amd/blis-${BLIS_VERSION} /usr/share/Modules/modulefiles/amd/blis diff --git a/centos/common/install_mpis.sh b/centos/common/install_mpis.sh index 27aa999f..0ed7def6 100755 --- a/centos/common/install_mpis.sh +++ b/centos/common/install_mpis.sh @@ -25,9 +25,9 @@ cd .. # OpenMPI 4.0.4 -OMPI_VERSION="4.0.4" +OMPI_VERSION="4.0.5" OMPI_DOWNLOAD_URL=https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-${OMPI_VERSION}.tar.gz -$COMMON_DIR/download_and_verify.sh $OMPI_DOWNLOAD_URL "dca264f420411f540a496bdd131bffd83e325fc9006286b39dd19b62d7368233" +$COMMON_DIR/download_and_verify.sh $OMPI_DOWNLOAD_URL "572e777441fd47d7f06f1b8a166e7f44b8ea01b8b2e79d1e299d509725d1bd05" tar -xvf openmpi-${OMPI_VERSION}.tar.gz cd openmpi-${OMPI_VERSION} ./configure --prefix=${INSTALL_PREFIX}/openmpi-${OMPI_VERSION} --with-ucx=${UCX_PATH} --with-hcoll=${HCOLL_PATH} --enable-mpirun-prefix-by-default --with-platform=contrib/platform/mellanox/optimized && make -j$(nproc) && make install @@ -44,20 +44,20 @@ sed -i -e 's/ACCEPT_EULA=decline/ACCEPT_EULA=accept/g' silent.cfg cd .. # Install MVAPICH2-X 2.3 -MVAPICH2X_DOWNLOAD_URL=https://mvapich.cse.ohio-state.edu/download/mvapich/mv2x/2.3/mofed5.1/mvapich2-x-azure-xpmem-mofed5.1-gnu9.2.0-v2.3xmofed5-1.el7.x86_64.rpm -$COMMON_DIR/download_and_verify.sh $MVAPICH2X_DOWNLOAD_URL "cbccc85ebbcdea4769999a42a45d40c9a22bf000410f46de219d69a0ef0291b6" -rpm -Uvh --nodeps mvapich2-x-azure-xpmem-mofed5.1-gnu9.2.0-v2.3xmofed5-1.el7.x86_64.rpm -MV2X_INSTALLATION_DIRECTORY="/opt/mvapich2-x" -MV2X_PATH="${MV2X_INSTALLATION_DIRECTORY}/gnu9.2.0/mofed5.1/azure-xpmem/mpirun" -MV2X_VERSION="2.3" +#MVAPICH2X_DOWNLOAD_URL=https://mvapich.cse.ohio-state.edu/download/mvapich/mv2x/2.3/mofed5.1/mvapich2-x-azure-xpmem-mofed5.1-gnu9.2.0-v2.3xmofed5-1.el7.x86_64.rpm +#$COMMON_DIR/download_and_verify.sh $MVAPICH2X_DOWNLOAD_URL "cbccc85ebbcdea4769999a42a45d40c9a22bf000410f46de219d69a0ef0291b6" +#rpm -Uvh --nodeps mvapich2-x-azure-xpmem-mofed5.1-gnu9.2.0-v2.3xmofed5-1.el7.x86_64.rpm +#MV2X_INSTALLATION_DIRECTORY="/opt/mvapich2-x" +#MV2X_PATH="${MV2X_INSTALLATION_DIRECTORY}/gnu9.2.0/mofed5.1/azure-xpmem/mpirun" +#MV2X_VERSION="2.3" # download and build benchmark for MVAPICH2-X 2.3 -MVAPICH2X_BENCHMARK_DOWNLOAD_URL=http://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-5.6.3.tar.gz -$COMMON_DIR/download_and_verify.sh $MVAPICH2X_BENCHMARK_DOWNLOAD_URL "c5eaa8c5b086bde8514fa4cac345d66b397e02283bc06e44cb6402268a60aeb8" -tar -xvf osu-micro-benchmarks-5.6.3.tar.gz -cd osu-micro-benchmarks-5.6.3/ -./configure CC=${MV2X_PATH}/bin/mpicc CXX=${MV2X_PATH}/bin/mpicxx --prefix=${MV2X_INSTALLATION_DIRECTORY}/ && make -j$(nproc) && make install -cd .. +#MVAPICH2X_BENCHMARK_DOWNLOAD_URL=http://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-5.6.3.tar.gz +#$COMMON_DIR/download_and_verify.sh $MVAPICH2X_BENCHMARK_DOWNLOAD_URL "c5eaa8c5b086bde8514fa4cac345d66b397e02283bc06e44cb6402268a60aeb8" +#tar -xvf osu-micro-benchmarks-5.6.3.tar.gz +#cd osu-micro-benchmarks-5.6.3/ +#./configure CC=${MV2X_PATH}/bin/mpicc CXX=${MV2X_PATH}/bin/mpicxx --prefix=${MV2X_INSTALLATION_DIRECTORY}/ && make -j$(nproc) && make install +#cd .. # Setup module files for MPIs mkdir -p /usr/share/Modules/modulefiles/mpi/ @@ -114,26 +114,26 @@ setenv MPI_HOME /opt/intel/impi/${IMPI_2019_VERSION}/intel64 EOF # MVAPICH2-X 2.3 -cat << EOF >> /usr/share/Modules/modulefiles/mpi/mvapich2x-${MV2X_VERSION} -#%Module 1.0 -# -# MVAPICH2-X ${MV2X_VERSION} -# -conflict mpi -module load ${GCC_VERSION} -prepend-path PATH ${MV2X_PATH}/bin -prepend-path LD_LIBRARY_PATH ${MV2X_PATH}/lib -prepend-path MANPATH ${MV2X_PATH}/share/man -setenv MPI_BIN ${MV2X_PATH}/bin -setenv MPI_INCLUDE ${MV2X_PATH}/include -setenv MPI_LIB ${MV2X_PATH}/lib -setenv MPI_MAN ${MV2X_PATH}/share/man -setenv MPI_HOME ${MV2X_PATH} -EOF +#cat << EOF >> /usr/share/Modules/modulefiles/mpi/mvapich2x-${MV2X_VERSION} +##%Module 1.0 +## +## MVAPICH2-X ${MV2X_VERSION} +## +#conflict mpi +#module load ${GCC_VERSION} +#prepend-path PATH ${MV2X_PATH}/bin +#prepend-path LD_LIBRARY_PATH ${MV2X_PATH}/lib +#prepend-path MANPATH ${MV2X_PATH}/share/man +#setenv MPI_BIN ${MV2X_PATH}/bin +#setenv MPI_INCLUDE ${MV2X_PATH}/include +#setenv MPI_LIB ${MV2X_PATH}/lib +#setenv MPI_MAN ${MV2X_PATH}/share/man +#setenv MPI_HOME ${MV2X_PATH} +#EOF # Create symlinks for modulefiles ln -s /usr/share/Modules/modulefiles/mpi/mvapich2-${MV2_VERSION} /usr/share/Modules/modulefiles/mpi/mvapich2 ln -s /usr/share/Modules/modulefiles/mpi/openmpi-${OMPI_VERSION} /usr/share/Modules/modulefiles/mpi/openmpi ln -s /usr/share/Modules/modulefiles/mpi/impi_${IMPI_2019_VERSION} /usr/share/Modules/modulefiles/mpi/impi-2019 -ln -s /usr/share/Modules/modulefiles/mpi/mvapich2x-${MV2X_VERSION} /usr/share/Modules/modulefiles/mpi/mvapich2x +#ln -s /usr/share/Modules/modulefiles/mpi/mvapich2x-${MV2X_VERSION} /usr/share/Modules/modulefiles/mpi/mvapich2x diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 2d6fb7ba..6bb5a250 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -6,25 +6,24 @@ MKL_VERSION="2019.5.281" MVAPICH2X_INSTALLATION_DIRECTORY="/opt/mvapich2-x" IMPI2018_PATH="/opt/intel/compilers_and_libraries_2018.5.274" -CENTOS_MOFED_VERSION="MLNX_OFED_LINUX-5.1-0.6.6.0" -HPCX_OMB_PATH_CENTOS_76="/opt/hpcx-v2.7.0-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.6-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" -HPCX_OMB_PATH_CENTOS_77="/opt/hpcx-v2.7.0-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.7-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" -HPCX_OMB_PATH_CENTOS_78="/opt/hpcx-v2.7.0-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat7.8-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" -HPCX_OMB_PATH_CENTOS_81="/opt/hpcx-v2.7.0-gcc-MLNX_OFED_LINUX-5.1-0.6.6.0-redhat8.1-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" +CENTOS_MOFED_VERSION="MLNX_OFED_LINUX-5.1-2.4.6.0" +HPCX_OMB_PATH_CENTOS_76="/opt/hpcx-v2.7.2-gcc-${CENTOS_MOFED_VERSION}-redhat7.6-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" +HPCX_OMB_PATH_CENTOS_77="/opt/hpcx-v2.7.2-gcc-${CENTOS_MOFED_VERSION}-redhat7.7-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" +HPCX_OMB_PATH_CENTOS_78="/opt/hpcx-v2.7.2-gcc-${CENTOS_MOFED_VERSION}-redhat7.8-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" +HPCX_OMB_PATH_CENTOS_81="/opt/hpcx-v2.7.2-gcc-${CENTOS_MOFED_VERSION}-redhat8.1-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2" CENTOS_MODULE_FILES_ROOT="/usr/share/Modules/modulefiles" CENTOS_IMPI2019_PATH="/opt/intel/compilers_and_libraries_2020.2.254" CENTOS_MVAPICH2_PATH="/opt/mvapich2-2.3.4" CENTOS_MVAPICH2X_PATH="${MVAPICH2X_INSTALLATION_DIRECTORY}/gnu9.2.0/mofed5.1/azure-xpmem/mpirun" -CENTOS_OPENMPI_PATH="/opt/openmpi-4.0.4" +CENTOS_OPENMPI_PATH="/opt/openmpi-4.0.5" -UBUNTU_MOFED_VERSION="MLNX_OFED_LINUX-5.0-1.0.0.0" -HPCX_PATH_UBUNTU_1804="/opt/hpcx-v2.6.0-gcc-MLNX_OFED_LINUX-5.0-1.0.0.0-ubuntu18.04-x86_64" +UBUNTU_MOFED_VERSION="MLNX_OFED_LINUX-5.1-2.4.6.0" UBUNTU_MODULE_FILES_ROOT="/usr/share/modules/modulefiles" -HPCX_OMB_PATH_UBUNTU_1804="/opt/hpcx-v2.6.0-gcc-MLNX_OFED_LINUX-5.0-1.0.0.0-ubuntu18.04-x86_64/ompi/tests/osu-micro-benchmarks-5.3.2/" +HPCX_OMB_PATH_UBUNTU_1804="/opt/hpcx-v2.6.0-gcc-${UBUNTU_MOFED_VERSION}-ubuntu18.04-x86_64/ompi/tests/osu-micro-benchmarks-5.6.2/" UBUNTU_IMPI2019_PATH="/opt/intel/compilers_and_libraries_2020.2.254" UBUNTU_MVAPICH2_PATH="/opt/mvapich2-2.3.4" UBUNTU_MVAPICH2X_PATH="${MVAPICH2X_INSTALLATION_DIRECTORY}/gnu9.2.0/mofed5.0/advanced-xpmem/mpirun" -UBUNTU_OPENMPI_PATH="/opt/openmpi-4.0.4" +UBUNTU_OPENMPI_PATH="/opt/openmpi-4.0.5" CHECK_HPCX=0 CHECK_IMPI_2019=0 @@ -33,8 +32,7 @@ CHECK_OMPI=0 CHECK_MVAPICH2=0 CHECK_MVAPICH2X=0 CHECK_CUDA=0 -CHECK_LUSTRE=0 -CHECK_BLIS_MT=0 +CHECK_AOCL=1 # Find distro find_distro() { @@ -74,7 +72,7 @@ then CHECK_IMPI_2018=1 CHECK_OMPI=1 CHECK_MVAPICH2=1 - CHECK_MVAPICH2X=1 + CHECK_MVAPICH2X=0 MODULE_FILES_ROOT=${CENTOS_MODULE_FILES_ROOT} MOFED_VERSION=${CENTOS_MOFED_VERSION} IMPI2019_PATH=${CENTOS_IMPI2019_PATH} @@ -89,7 +87,7 @@ then CHECK_IMPI_2018=1 CHECK_OMPI=1 CHECK_MVAPICH2=1 - CHECK_MVAPICH2X=1 + CHECK_MVAPICH2X=0 MODULE_FILES_ROOT=${CENTOS_MODULE_FILES_ROOT} MOFED_VERSION=${CENTOS_MOFED_VERSION} IMPI2019_PATH=${CENTOS_IMPI2019_PATH} @@ -104,7 +102,7 @@ then CHECK_IMPI_2018=1 CHECK_OMPI=1 CHECK_MVAPICH2=1 - CHECK_MVAPICH2X=1 + CHECK_MVAPICH2X=0 MODULE_FILES_ROOT=${CENTOS_MODULE_FILES_ROOT} MOFED_VERSION=${CENTOS_MOFED_VERSION} IMPI2019_PATH=${CENTOS_IMPI2019_PATH} @@ -118,7 +116,7 @@ then CHECK_IMPI_2019=1 CHECK_OMPI=1 CHECK_MVAPICH2=1 - CHECK_MVAPICH2X=1 + CHECK_MVAPICH2X=0 MODULE_FILES_ROOT=${CENTOS_MODULE_FILES_ROOT} MOFED_VERSION=${CENTOS_MOFED_VERSION} IMPI2019_PATH=${CENTOS_IMPI2019_PATH} @@ -140,6 +138,7 @@ then MVAPICH2_PATH=${UBUNTU_MVAPICH2_PATH} MVAPICH2X_PATH=${UBUNTU_MVAPICH2X_PATH} OPENMPI_PATH=${UBUNTU_OPENMPI_PATH} + CHECK_AOCL=0 else echo "*** Error - invalid distro!" exit -1 @@ -185,23 +184,17 @@ check_exit_code "IB device state: LinkUp" "IB link not up" # verify GCC modulefile check_exists "${MODULE_FILES_ROOT}/gcc-${GCC_VERSION}" -# verify AMD modulefiles -check_exists "${MODULE_FILES_ROOT}/amd/fftw" -check_exists "${MODULE_FILES_ROOT}/amd/libflame" -check_exists "${MODULE_FILES_ROOT}/amd/blis" - # verify s/w package installations check_exists "/opt/gcc-${GCC_VERSION}/" -check_exists "/opt/amd/blis/" -check_exists "/opt/amd/fftw/" -check_exists "/opt/amd/libflame/" check_exists "/opt/intel/compilers_and_libraries_${MKL_VERSION}/linux/mkl/" -# blis-mt -if [ $CHECK_BLIS_MT -eq 1 ] +if [ $CHECK_AOCL -eq 1 ] then - check_exists "${MODULE_FILES_ROOT}/amd/blis-mt" - check_exists "/opt/amd/blis-mt/" + # verify AMD modulefiles + check_exists "${MODULE_FILES_ROOT}/amd/aocl" + + check_exists "/opt/amd/lib/" + check_exists "/opt/amd/include/" fi # verify mpi installations and their modulefiles @@ -277,13 +270,6 @@ then check_exit_code "Nvidia SMI - Cuda Drivers" "Failed to run Nvidia SMI - Cuda Drivers" fi -# Check if lustre client is installed properly -if [ $CHECK_LUSTRE -eq 1 ] -then - modprobe -v lustre - check_exit_code "Lustre Client" "Failed to load Lustre Client" -fi - echo "ALL OK!" exit 0 diff --git a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/hpc-tuning.sh b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/hpc-tuning.sh index e9c4f96c..eaa0abbe 100755 --- a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/hpc-tuning.sh +++ b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/hpc-tuning.sh @@ -3,6 +3,19 @@ # Disable some unneeded services by default (administrators can re-enable if desired) systemctl disable ufw +# Disable cloud-init +echo network: {config: disabled} | sudo tee /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg +sudo bash -c "cat > /etc/netplan/50-cloud-init.yaml" <<'EOF' +network: + ethernets: + eth0: + dhcp4: true + version: 2 +EOF +netplan apply + + + # Update memory limits cat << EOF >> /etc/security/limits.conf * hard memlock unlimited @@ -17,5 +30,11 @@ echo "vm.zone_reclaim_mode = 1" >> /etc/sysctl.conf sysctl -p # Configure WALinuxAgent -sed -i -e 's/# OS.EnableRDMA=y/OS.EnableRDMA=y/g' /etc/waagent.conf -systemctl enable walinuxagent +sudo sed -i -e 's/# OS.EnableRDMA=y/OS.EnableRDMA=y/g' /etc/waagent.conf +echo "Extensions.GoalStatePeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.EnableFirewallPeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.RemovePersistentNetRulesPeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.RootDeviceScsiTimeoutPeriod=300" | sudo tee -a /etc/waagent.conf +echo "OS.MonitorDhcpClientRestartPeriod=60" | sudo tee -a /etc/waagent.conf +echo "Provisioning.MonitorHostNamePeriod=60" | sudo tee -a /etc/waagent.conf +sudo systemctl restart walinuxagent diff --git a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install.sh b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install.sh index 9b06b995..fc7379e7 100755 --- a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install.sh +++ b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install.sh @@ -17,12 +17,13 @@ source ./set_properties.sh # install mpi libraries ./install_mpis.sh +# cleanup downloaded tarballs +rm -rf *.tgz *.bz2 *.tbz *.tar.gz +rm -Rf -- */ + # install nvidia gpu driver ./install_nvidiagpudriver.sh -# install AMD tuned libraries -./install_amd_libs.sh - # install Intel libraries ./install_intel_libs.sh diff --git a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_amd_libs.sh b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_amd_libs.sh deleted file mode 100755 index 679ce536..00000000 --- a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_amd_libs.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash -set -ex - -# Load gcc -GCC_VERSION=gcc-9.2.0 -export PATH=/opt/${GCC_VERSION}/bin:$PATH -export LD_LIBRARY_PATH=/opt/${GCC_VERSION}/lib64:$LD_LIBRARY_PATH -set CC=/opt/${GCC_VERSION}/bin/gcc -set GCC=/opt/${GCC_VERSION}/bin/gcc - -INSTALL_PREFIX=/opt/amd -mkdir -p ${INSTALL_PREFIX} - -# AMD FFTW -FFTW_DOWNLOAD_URL=https://github.com/amd/amd-fftw/releases/download/2.0/aocl-fftw-ubuntu-2.0.tar.gz -$COMMON_DIR/download_and_verify.sh $FFTW_DOWNLOAD_URL "306b7e68faaef6acd4970b91b3ac8ea43577b5dbe6756b2327fe8a7daa29a71f" -tar -xvf aocl-fftw-ubuntu-2.0.tar.gz -cp -r amd-fftw ${INSTALL_PREFIX}/fftw - - -# AMD libflame -LIBFLAME_DOWNLOAD_URL=https://github.com/amd/libflame/releases/download/2.0/aocl-libflame-ubuntu-2.0.tar.gz -$COMMON_DIR/download_and_verify.sh $LIBFLAME_DOWNLOAD_URL "8a39caae79de8065d6ba89008ca516e46ce1d60db346d993f6aa170945eaf051" -tar -xvf aocl-libflame-ubuntu-2.0.tar.gz -cp -r amd-libflame ${INSTALL_PREFIX}/libflame - - -# AMD blis -BLIS_DOWNLOAD_URL=https://github.com/amd/blis/releases/download/2.0/aocl-blis-ubuntu-2.0.tar.gz -$COMMON_DIR/download_and_verify.sh $BLIS_DOWNLOAD_URL "89d947b3879ad9bc0d03c6bafcc0340c1fb74489cb4768a006010585a3736990" -tar -xvf aocl-blis-ubuntu-2.0.tar.gz -cp -r amd-blis ${INSTALL_PREFIX}/blis - - -# AMD blis-mt -BLIS_MT_DOWNLOAD_URL=https://github.com/amd/blis/releases/download/2.0/aocl-blis-mt-ubuntu-2.0.tar.gz -$COMMON_DIR/download_and_verify.sh $BLIS_MT_DOWNLOAD_URL "f8fe674a7992ede058cde9e1551627dba10300dbcad3b216bbca34ad8718291a" -tar -xvf aocl-blis-mt-ubuntu-2.0.tar.gz -cp -r amd-blis-mt ${INSTALL_PREFIX}/blis-mt - -FFTW_VERSION="2.0" -LIBFLAME_VERSION="2.0" -BLIS_VERSION="2.0" -BLIS_MT_VERSION="2.0" - -# Setup module files for AMD Libraries -MODULE_FILES_DIRECTORY=/usr/share/modules/modulefiles/amd -mkdir -p ${MODULE_FILES_DIRECTORY} - -# fftw -cat << EOF >> ${MODULE_FILES_DIRECTORY}/fftw-${FFTW_VERSION} -#%Module 1.0 -# -# fftw -# -module load ${GCC_VERSION} -prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/fftw/lib -setenv AMD_FFTW_INCLUDE ${INSTALL_PREFIX}/fftw/include -EOF - -# libflame -cat << EOF >> ${MODULE_FILES_DIRECTORY}/libflame-${LIBFLAME_VERSION} -#%Module 1.0 -# -# libflame -# -module load ${GCC_VERSION} -prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/libflame/lib -setenv AMD_LIBFLAME_INCLUDE ${INSTALL_PREFIX}/libflame/include -EOF - -# blis -cat << EOF >> ${MODULE_FILES_DIRECTORY}/blis-${BLIS_VERSION} -#%Module 1.0 -# -# blis -# -module load ${GCC_VERSION} -prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/blis/lib -setenv AMD_BLIS_INCLUDE ${INSTALL_PREFIX}/blis/include -EOF - -# blis-mt -cat << EOF >> ${MODULE_FILES_DIRECTORY}/blis-mt-${BLIS_MT_VERSION} -#%Module 1.0 -# -# blis-mt -# -module load ${GCC_VERSION} -prepend-path LD_LIBRARY_PATH ${INSTALL_PREFIX}/blis-mt/lib -setenv AMD_BLIS_MT_INCLUDE ${INSTALL_PREFIX}/blis-mt/include -EOF - -# Create symlinks for modulefiles -ln -s ${MODULE_FILES_DIRECTORY}/fftw-${FFTW_VERSION} ${MODULE_FILES_DIRECTORY}/fftw -ln -s ${MODULE_FILES_DIRECTORY}/libflame-${LIBFLAME_VERSION} ${MODULE_FILES_DIRECTORY}/libflame -ln -s ${MODULE_FILES_DIRECTORY}/blis-${BLIS_VERSION} ${MODULE_FILES_DIRECTORY}/blis -ln -s ${MODULE_FILES_DIRECTORY}/blis-mt-${BLIS_MT_VERSION} ${MODULE_FILES_DIRECTORY}/blis-mt diff --git a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mellanoxofed.sh b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mellanoxofed.sh index 0abc48e8..24edcc60 100755 --- a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mellanoxofed.sh +++ b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mellanoxofed.sh @@ -1,13 +1,12 @@ #!/bin/bash set -ex -# Change versions (Ubuntu, OFED) here if needed -# See https://github.com/Azure/azhpc-extensions/blob/master/InfiniBand/Linux/resources.json -DRIVER_URL=http://content.mellanox.com/ofed/MLNX_OFED-5.0-1.0.0.0/MLNX_OFED_LINUX-5.0-1.0.0.0-ubuntu18.04-x86_64.tgz +MLNX_OFED_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/MLNX_OFED_LINUX-5.1-2.4.6.0-ubuntu18.04-x86_64.tgz +TARBALL=$(basename ${MLNX_OFED_DOWNLOAD_URL}) +MOFED_FOLDER=$(basename ${MLNX_OFED_DOWNLOAD_URL} .tgz) -$COMMON_DIR/download_and_verify.sh $DRIVER_URL "3a198e1114d22fe31338003ef6b8c0e7b082ce719a764b5165727eac63d2b5db" -DRIVER_FILE=$(basename $DRIVER_URL) # Extract filename of tarball -tar xzf $DRIVER_FILE # Extract tarball -DRIVER_ROOT=${DRIVER_FILE%.*} # Extract root without .tgz +$COMMON_DIR/download_and_verify.sh $MLNX_OFED_DOWNLOAD_URL "26fb818ed225e6a7eb0621fa0b28cc633e04db0b8ee2ea70a5d0152bee4bcbe4" +tar zxvf ${TARBALL} + +./${MOFED_FOLDER}/mlnxofedinstall --add-kernel-support --skip-unsupported-devices-check -./$DRIVER_ROOT/mlnxofedinstall --add-kernel-support diff --git a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mpis.sh b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mpis.sh index 01f040bb..130e41cf 100755 --- a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mpis.sh +++ b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_mpis.sh @@ -10,15 +10,17 @@ set GCC=/opt/${GCC_VERSION}/bin/gcc INSTALL_PREFIX=/opt -# HPC-X v2.6.0 -HPCX_VERSION="v2.6.0" +# HPC-X v2.7.2 +HPCX_VERSION="v2.7.2" -HPCX_DOWNLOAD_URL=http://www.mellanox.com/downloads/hpc/hpc-x/v2.6/hpcx-v2.6.0-gcc-MLNX_OFED_LINUX-5.0-1.0.0.0-ubuntu18.04-x86_64.tbz -$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "15f27a3f14d6e90c5b08f5ffd43e836b570ebf78b9b240042c35c071d90c43c2" -tar -xvf hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.0-1.0.0.0-ubuntu18.04-x86_64.tbz -mv hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.0-1.0.0.0-ubuntu18.04-x86_64 ${INSTALL_PREFIX} +HPCX_DOWNLOAD_URL=https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.7.2-gcc-MLNX_OFED_LINUX-5.1-2.4.6.0-ubuntu18.04-x86_64.tbz +TARBALL=$(basename ${HPCX_DOWNLOAD_URL}) +HPCX_FOLDER=$(basename ${HPCX_DOWNLOAD_URL} .tbz) -HPCX_PATH=${INSTALL_PREFIX}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5.0-1.0.0.0-ubuntu18.04-x86_64 +$COMMON_DIR/download_and_verify.sh $HPCX_DOWNLOAD_URL "3050ed693f002e3e976155a6b7258038fc23ef0a8f4921457a70592b97b90c43" +tar -xvf ${TARBALL} +mv ${HPCX_FOLDER} ${INSTALL_PREFIX} +HPCX_PATH=${INSTALL_PREFIX}/${HPCX_FOLDER} # MVAPICH2 2.3.4 MV2_VERSION="2.3.4" @@ -29,10 +31,10 @@ cd mvapich2-${MV2_VERSION} ./configure --prefix=${INSTALL_PREFIX}/mvapich2-${MV2_VERSION} --enable-g=none --enable-fast=yes && make -j$(nproc) && make install cd .. -# OpenMPI 4.0.4 -OMPI_VERSION="4.0.4" +# OpenMPI 4.0.5 +OMPI_VERSION="4.0.5" OMPI_DOWNLOAD_URL=https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-${OMPI_VERSION}.tar.gz -$COMMON_DIR/download_and_verify.sh $OMPI_DOWNLOAD_URL "dca264f420411f540a496bdd131bffd83e325fc9006286b39dd19b62d7368233" +$COMMON_DIR/download_and_verify.sh $OMPI_DOWNLOAD_URL "572e777441fd47d7f06f1b8a166e7f44b8ea01b8b2e79d1e299d509725d1bd05" tar -xvf openmpi-${OMPI_VERSION}.tar.gz cd openmpi-${OMPI_VERSION} ./configure --prefix=${INSTALL_PREFIX}/openmpi-${OMPI_VERSION} --with-ucx=${UCX_PATH} --with-hcoll=${HCOLL_PATH} --enable-mpirun-prefix-by-default --with-platform=contrib/platform/mellanox/optimized && make -j$(nproc) && make install diff --git a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_nvidiagpudriver.sh b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_nvidiagpudriver.sh index 88acf437..6115bef1 100755 --- a/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_nvidiagpudriver.sh +++ b/ubuntu/ubuntu-18.x/ubuntu-18.04-hpc/install_nvidiagpudriver.sh @@ -1,17 +1,22 @@ #!/bin/bash set -ex -CUDA_PIN_DOWNLOAD_URL=https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin -$COMMON_DIR/download_and_verify.sh $CUDA_PIN_DOWNLOAD_URL "dd00df91301f85f920a43641113793b3e8d6006e058e36fc69f44eadaebf648a" -mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600 +# Install Cuda +wget https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_450.51.06_linux.run +chmod +x cuda_11.0.3_450.51.06_linux.run +sudo ./cuda_11.0.3_450.51.06_linux.run --silent +echo 'export PATH=$PATH:/usr/local/cuda/bin' | sudo tee -a /etc/bash.bashrc +echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64' | sudo tee -a /etc/bash.bashrc -PUBKEY_URL=/var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub -CUDA_REPO_PKG=cuda-repo-ubuntu1804-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb +# Install DCGM +#DCGM_VERSION=2.0.10 +#wget --no-check-certificate https://developer.download.nvidia.com/compute/redist/dcgm/${DCGM_VERSION}/DEBS/datacenter-gpu-manager_${DCGM_VERSION}_amd64.deb +#wget https://developer.download.nvidia.com/compute/redist/dcgm/${DCGM_VERSION}/DEBS/datacenter-gpu-manager_${DCGM_VERSION}_amd64.deb +#sudo dpkg -i datacenter-gpu-manager_*.deb && \ +# sudo rm -f datacenter-gpu-manager_*.deb + +# Install NCCL +#wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb +#sudo dpkg -i nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb +#sudo apt install libnccl2 libnccl-dev -CUDA_REPO_DOWNLOAD_URL=http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-ubuntu1804-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb -$COMMON_DIR/download_and_verify.sh $CUDA_REPO_DOWNLOAD_URL "a9a5ab0324291b25170245ad39817684487f9bceda1848f05be1b53acd55fafc" -dpkg -i ${CUDA_REPO_PKG} -apt-key add ${PUBKEY_URL} -apt-get update -apt-get install --no-install-recommends -y cuda-drivers -apt-get -y install cuda