-
Notifications
You must be signed in to change notification settings - Fork 0
/
spark_setup.sh
executable file
·118 lines (94 loc) · 3.44 KB
/
spark_setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/bin/bash -l
CURDIR=`pwd`
WORKDIR=${HOME}
current_time=$(date +"%Y.%m.%d.%S")
if [ ! -d $CURDIR/logs ];
then
mkdir logs
fi
log=`pwd`/logs/spark_setup_$current_time.log
echo -e | tee -a $log
#MASTER=$1
MASTER=$(ssh $1 "hostname")
#SLAVES=`echo $2 | tr "," "%"`
SLAVES=`cat ${HADOOP_HOME}/etc/hadoop/slaves | tr "\n" "%"`
#Logic to create server list
echo $SLAVES | tr "%" "\n" | grep $MASTER
if [ $? -eq 0 ]
then
#if master is also used as data machine
SERVERS=$SLAVES
else
SERVERS=`echo ''$MASTER'%'$SLAVES''`
fi
##backing the hive-site.xml file
echo "Backing up the config files hive-site.xml and spark-defaults.conf"
cp ${SPARK_HOME}/conf/hive-site.xml ${WORKDIR}
cp ${SPARK_HOME}/conf/spark-defaults.conf ${WORKDIR}
SPARK_DIR=`ls -ltr ${WORKDIR}/spark-*-bin-hadoop-*.tgz | tail -1 | awk '{print $9}' | sed 's/.tgz//'`
SPARK_FILE=`ls -ltr ${WORKDIR}/spark-*-bin-hadoop-*.tgz | tail -1 | awk '{print $9}'`
if [ $? -ne 0 ];
then
echo "Spark tgz file does not exist. Please rerun the spark validation job to generate again." | tee -a $log
exit 1
echo "***********************************************"
fi
## Exporting SPARK_HOME to the PATH and Add scripts to the PATH
for i in `echo $SERVERS | tr "%" "\n"`
do
if [ $i != $MASTER ]
then
echo 'Deleting old spark file and copying new Spark setup file on '$i'' | tee -a $log
ssh $i "rm ${SPARK_FILE}"
scp ${SPARK_FILE} @$i:${WORKDIR} | tee -a $log
fi
ssh $i '[ -d '${SPARK_DIR}' ]'
if [ $? -eq 0 ]
then
echo 'Deleting existing spark folder '$SPARK_DIR' from '$i' '| tee -a $log
ssh $i "rm -rf ${SPARK_DIR}"
fi
echo 'Unzipping Spark setup file on '$i'' | tee -a $log
ssh $i "tar xf ${SPARK_FILE} --gzip" | tee -a $log
echo 'Updating .bashrc file on '$i' with Spark variables '
echo '#StartSparkEnv' >tmp_b
echo "export SPARK_HOME="${SPARK_DIR}"" >>tmp_b
echo "export PATH=\$SPARK_HOME/bin:\$PATH">>tmp_b
echo '#StopSparkEnv'>>tmp_b
scp tmp_b @$i:${WORKDIR}
ssh $i "grep -q "SPARK_HOME" ~/.bashrc"
if [ $? -ne 0 ];
then
ssh $i "cat tmp_b>>$HOME/.bashrc"
ssh $i "rm tmp_b"
else
ssh $i "sed -i '/#StartSparkEnv/,/#StopSparkEnv/ d' $HOME/.bashrc"
ssh $i "cat tmp_b>>$HOME/.bashrc"
ssh $i "rm tmp_b"
fi
ssh $i "source $HOME/.bashrc"
echo "---------------------------------------------" | tee -a $log
done
rm -rf tmp_b
##Exporting spark variables for current script session on master
export SPARK_HOME=${SPARK_DIR}
export PATH=$SPARK_HOME/bin:$PATH
## updating Slave file for Spark folder
source ${HOME}/.bashrc
echo 'Updating Slave file for Spark setup'| tee -a $log
cp ${SPARK_HOME}/conf/slaves.template ${SPARK_HOME}/conf/slaves
sed -i 's|localhost||g' ${SPARK_HOME}/conf/slaves
cat ${HADOOP_HOME}/etc/hadoop/slaves>>${SPARK_HOME}/conf/slaves
#setting spark log properties to display only errors
cp $SPARK_HOME/conf/log4j.properties.template $SPARK_HOME/conf/log4j.properties
sed -i 's/^log4j.rootCategory.*/log4j.rootCategory=ERROR, console/g' $SPARK_HOME/conf/log4j.properties
##Restoring hive-site.xml in conf dir under SPARK_HOME
echo "Restoring config files spark-defaults.conf and hive-site.xml in spark folder"
for i in `echo $SERVERS |tr "%" "\n"`
do
scp $SPARK_HOME/conf/log4j.properties @$i:$SPARK_HOME/conf | tee -a $log
scp $WORKDIR/spark-defaults.conf @$i:$SPARK_HOME/conf | tee -a $log
scp $WORKDIR/hive-site.xml @$i:$SPARK_HOME/conf | tee -a $log
done
echo -e "Spark installation done..!!\n" | tee -a $log
echo -e