-
Notifications
You must be signed in to change notification settings - Fork 0
/
reconfigure-hadoop.sample
executable file
·105 lines (76 loc) · 4.67 KB
/
reconfigure-hadoop.sample
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/bin/bash
#the purpose of this is to set the slaves and the
#directory for the HDFS using the PBS environmental variables
#it is repeatable , no side effects
TMPF=tmp3253434343431
hostname | grep login > /dev/null
if [ $? -eq 0 ]
then
echo "Run it from the hadoop master NOT from the login nodes"
echo "The env variables that you need are not yet set on this login node"
exit
fi
HADOOP_CFG=${HADOOP_HOME}/conf
rm ${HADOOP_CFG}/mapred-site.xml
if [ $# != 8 ]; then
echo "Need more arguments for the startup script, \"$#\""
exit 1
fi
echo "Setting path to /tmp/yiskylee"
sed "s/PATHTOHDFS/\/state\/partition1\/hadoop-yiskylee/g" $HADOOP_CFG/core-site.toset | sed "s/MASTER/`hostname`/g" > $HADOOP_CFG/core-site.xml
sed "s/PATHTOHDFS/\/state\/partition1\/hadoop-yiskylee/g" $HADOOP_CFG/hadoop-env.toset > $HADOOP_CFG/hadoop-env.sh
echo ${1} ${2} ${3} ${4} ${5} ${6} ${7} ${8}
#sed "s/MASTER/`hostname`/g" $HADOOP_CFG/mapred-site.toset | \
# sed "s/threads<\/name><value>0<\/value>/threads<\/name><value>40<\/value>/g" | \
# sed "s/copies<\/name><value>0<\/value>/copies<\/name><value>5<\/value>/g" | \
# sed "s/map.tasks.maximum<\/name><value>0<\/value>/map.tasks.maximum<\/name><value>${1}<\/value>/g" | \
# sed "s/reduce.tasks.maximum<\/name><value>0<\/value>/reduce.tasks.maximum<\/name><value>${13}<\/value>/g" | \
# sed "s/mapred.child.java.opts<\/name><value>-Xmx40G<\/value>/mapred.child.java.opts<\/name><value>-Xmx40G -Dopencl.mapper.groups=${3} -Dopencl.mapper.threadsPerGroup=${4} -Dopencl.reducer.groups=${7} -Dopencl.reducer.threadsPerGroup=${8} -Dopencl.mapper.bufferSize=${5} -Dopencl.reducer.bufferSize=${6} -Dopencl.mapper.buffers=${11} -Dopencl.reducer.buffers=${12}<\/value>/g" > $HADOOP_CFG/mapred-site.xml
MAX_MAP_TASKS=${1}
MAX_REDUCE_TASKS=${2}
MAPPER_OPENCL_THREADS_GPU=${3}
MAPPER_OPENCL_THREADS_CPU=${4}
REDUCER_OPENCL_THREADS_GPU=${5}
REDUCER_OPENCL_THREADS_CPU=${6}
HDFS_BLOCK_SIZE=${7}
JAVA_HEAP_SIZE=${8}
echo Max. num tasks = ${MAX_MAP_TASKS} mappers, ${MAX_REDUCE_TASKS} reducers
echo Mapper thread configs = GPU \( ${MAPPER_OPENCL_THREADS_GPU} \), CPU \( ${MAPPER_OPENCL_THREADS_CPU} \)
echo Reducer thread configs = GPU \( ${REDUCER_OPENCL_THREADS_GPU} \), CPU \( ${REDUCER_OPENCL_THREADS_CPU} \)
echo HDFS block size = ${HDFS_BLOCK_SIZE}
echo GPU/CPU Mults shouldn\'t matter
sed "s/MASTER/`hostname`/g" $HADOOP_CFG/mapred-site.toset | \
sed "s/mapred.child.java.opts<\/name><value>-Xmx48G<\/value>/mapred.child.java.opts<\/name><value>-Xms2G -Xmx${JAVA_HEAP_SIZE}G -Dopencl.mapper.threadsPerGroup.gpu=${MAPPER_OPENCL_THREADS_GPU} -Dopencl.mapper.threadsPerGroup.cpu=${MAPPER_OPENCL_THREADS_CPU} -Dopencl.reducer.threadsPerGroup.gpu=${REDUCER_OPENCL_THREADS_GPU} -Dopencl.reducer.threadsPerGroup.cpu=${REDUCER_OPENCL_THREADS_CPU} -Dopencl.scheduler=BALANCED -Dopencl.vectorsToBuffer=32768<\/value>/g" > $HADOOP_CFG/mapred-site.xml
#echo "<property>" >> $HADOOP_CFG/mapred-site.xml
#echo " <name>mapred.task.timeout</name><value>60000</value>" >> $HADOOP_CFG/mapred-site.xml
#echo "</property>" >> $HADOOP_CFG/mapred-site.xml
#
#echo "<property>" >> $HADOOP_CFG/mapred-site.xml
#echo " <name>mapred.child.java.opts</name><value>-Xmx12G</value>" >> $HADOOP_CFG/mapred-site.xml
#echo "</property>" >> $HADOOP_CFG/mapred-site.xml
>$TMPF
echo "s/datanode.handler.count<\/name><value>0<\/value>/datanode.handler.count<\/name><value>3<\/value>/g" > $TMPF
echo "s/namenode.handler.count<\/name><value>0<\/value>/namenode.handler.count<\/name><value>10<\/value>/g" >> $TMPF
echo "s/xcievers<\/name><value>0<\/value>/xcievers<\/name><value>256<\/value>/g" >> $TMPF
echo "s/dfs.replication<\/name><value>0<\/value>/dfs.replication<\/name><value>3<\/value>/g" >> $TMPF
echo "s/dfs.block.size<\/name><value>0<\/value>/dfs.block.size<\/name><value>${HDFS_BLOCK_SIZE}<\/value>/g" >> $TMPF
sed -f $TMPF $HADOOP_CFG/hdfs-site.toset > $HADOOP_CFG/hdfs-site.xml
cat ${HADOOP_HOME}/hosts | sort | uniq | grep -v `hostname | cut -d"." -f1 ` > $HADOOP_CFG/slaves
#cat /opt/hadoop/conf/slaves > $HADOOP_CFG/slaves
echo `hostname -s` > $HADOOP_CFG/masters
echo
echo
echo
echo "Here are the results:"
cat $HADOOP_CFG/hadoop-env.sh
echo "-----------------------------------------------------"
cat $HADOOP_CFG/mapred-site.xml | egrep 'name>|value'
echo "-----------------------------------------------------"
cat $HADOOP_CFG/hdfs-site.xml | egrep 'name>|value'
echo "-----------------------------------------------------"
cat $HADOOP_CFG/core-site.xml | egrep 'name>|value'
echo "-----------------------------------------------------"
cat $HADOOP_CFG/slaves
echo "-----------------------------------------------------"
cat $HADOOP_CFG/masters
echo "-----------------------------------------------------"