forked from Azure/azurehpc-health-checks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinstall-nhc.sh
executable file
·131 lines (105 loc) · 3.64 KB
/
install-nhc.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
INSTALL_DIR=$1
CUDA_DIR=$2
if [[ -z "$INSTALL_DIR" ]];then
INSTALL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
fi
if [[ -z "$CUDA_DIR" ]];then
CUDA_DIR=/usr/local/cuda
fi
SRC_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# not using src directory as insatll location
if [ $SRC_DIR != $INSTALL_DIR ]; then
INSTALL_DIR=$INSTALL_DIR/azurehpc-health-checks
fi
export AZ_NHC_VERSION_LOG=$INSTALL_DIR/docs/version.log
function install_lbnl_nhc(){
pushd $SRC_DIR/build
NHC_VERSION=1.4.3
wget -O nhc-$NHC_VERSION.tar.xz https://github.com/mej/nhc/releases/download/${NHC_VERSION}/lbnl-nhc-${NHC_VERSION}.tar.xz
tar -xf nhc-$NHC_VERSION.tar.xz
rm -f nhc-$NHC_VERSION.tar.xz
pushd lbnl-nhc-$NHC_VERSION
. /etc/os-release
case $ID in
ubuntu)
LIBEXEDIR=/usr/lib;;
*)
LIBEXEDIR=/usr/libexec;;
esac
./configure --prefix=/usr --sysconfdir=/etc --libexecdir=$LIBEXEDIR
sudo make test
echo -e "\n"
sudo make install
echo "NHC version: $NHC_VERSION" >> $AZ_NHC_VERSION_LOG
popd
popd
}
mkdir -p $INSTALL_DIR
mkdir -p $INSTALL_DIR/bin
mkdir -p $SRC_DIR/build
mkdir -p $INSTALL_DIR/docs
# create version log
AZVER=$(git describe --tags --abbrev=0)
cat > "$AZ_NHC_VERSION_LOG" <<EOL
This file contains the version of AzureHPC Health Checks and submodules.
Azure-NHC: $AZVER
submodules:
EOL
# install lbnl nhc
install_lbnl_nhc
# Install NHC dependencies
distro_check=$( cat /etc/os-release | grep -i ID_LIKE=)
distro=`awk -F= '/^NAME/{print $2}' /etc/os-release`
if [[ $distro_check =~ "debian" ]]; then
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y libpci-dev hwloc build-essential libboost-program-options-dev libssl-dev plocate cmake
elif [[ $distro =~ "AlmaLinux" ]]; then
sudo dnf install -y pciutils-devel hwloc openssl-devel boost-devel mlocate cmake
# this will attempt to install for the following os's: CentOS, RHEL, Fedora
elif [[ $distro_check =~ "CentOS" ]] || [[ $distro_check =~ "Rhel" ]] || [[ $distro_check =~ "fedora" ]]; then
sudo yum install -y pciutils-devel hwloc openssl-devel boost-devel mlocate cmake > /dev/null
echo "$distro version is not officially supported, proceed w/ caution."
else
echo "OS version $distro is not supported. Proceed w/ caution."
fi
# Install build tools
# Check cmake version + install if necessary
output=$(cmake --version | sed -n 1p | sed 's/[^0-9]*//g')
export NHC_CMAKE=cmake
if [ $output -lt 3200 ]; then
echo "Upgrade cmake version to 3.20 or above to build nvbandwidth"
pushd $SRC_DIR/build
wget -q -O cmake.sh https://github.com/Kitware/CMake/releases/download/v3.28.0/cmake-3.28.0-linux-x86_64.sh
chmod +x cmake.sh
mkdir -p cmake
./cmake.sh --skip-license --prefix=./cmake
export NHC_CMAKE=$(pwd)/cmake/bin/cmake
rm cmake.sh
popd
fi
# Copy over necessary files
sudo cp $SRC_DIR/customTests/*.nhc /etc/nhc/scripts
if [ $SRC_DIR != $INSTALL_DIR ]; then
cp -r $SRC_DIR/conf/ $INSTALL_DIR
cp -r $SRC_DIR/distributed_nhc/ $INSTALL_DIR
cp $SRC_DIR/*.md $INSTALL_DIR/docs/
cp $SRC_DIR/LICENSE $INSTALL_DIR/docs/
cp $SRC_DIR/run-health-checks.sh $INSTALL_DIR
fi
cp -r $SRC_DIR/customTests/topofiles/ $INSTALL_DIR
# Install NHC custom tests
pushd customTests/
./custom-test-setup.sh $INSTALL_DIR $CUDA_DIR
popd
# create env file
env_file="$INSTALL_DIR/aznhc_env_init.sh"
cat > "$env_file" <<EOL
#!/bin/bash
# This file is used to source the NHC environment variables
# It is recommended to source this file in your .bashrc or .bash_profile
# to make the NHC commands available in your shell.
export AZ_NHC_ROOT=$INSTALL_DIR
alias aznhc="sudo $INSTALL_DIR/run-health-checks.sh"
EOL
chmod +x "$env_file"
exit 0