forked from drebatto/glite-condor-infoprovider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
glite-info-dynamic-condor
executable file
·219 lines (198 loc) · 7.76 KB
/
glite-info-dynamic-condor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/bin/bash
#
# File: glite-info-dynamic-condor
# Author: David Rebatto ([email protected])
#
# Description:
# Gather queues information from condor collector and schedd daemons,
# assuming that there's a schedd per queue and that queues are named
# after the schedds' names (name@host).
# If a schedd fails in providing information, the corresponding queue
# is marked as 'Closed'.
#
# Revision History:
#
# 10-Oct-2011: First release
# 04-Jun-2013: Added Glue2 info
# 19-Mar-2014: Removed obsolete attribute GLUE2EntityCreationTime
# 04-Feb-2015: added "queue to job attribute mapping" support
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Read command line parameter
#
config_file=${1:-/etc/lrms/condor.conf}
if [[ ! -r $config_file ]]; then
echo "$0: cannot read configuration file $config_file" >&2
exit 1
fi
# Source the config file
#
. $config_file
condorstatus="${condor_path:-/usr/bin}/condor_status ${condor_host:+-pool $condor_host}"
condorq="${condor_path:-/usr/bin}/condor_q ${condor_host:+-pool $condor_host}"
undefvalue="###DELETE_ME###"
# Remove invalid character from variable names
function sanitize_name() { tr -c '[:alnum:]' '_'; }
# How is the CE deployed?
#
if [[ "$deployment" == "queue_to_schedd" ]]; then
## There is a schedd per queue (running on the central manager)
## The queue information are stored as attributes of the schedd's classad
# Get the queue list from condor
#
declare -a queues
queues=( `$condorstatus -subsystem schedd -format "%s " Name`)
if [ -z $queues ]; then
echo "$0: no queues defined in the collector" >&2
exit 1
fi
# Retrieve information for each queue
#
for queue in ${queues[@]}; do
queuename=`echo ${queue} | sanitize_name`
vardef="`$condorstatus -schedd $queue \
-format "${queuename}_WaitingJobs=%s " TotalIdleJobs \\
-format "${queuename}_RunningJobs=%s " TotalRunningJobs \\
-format "${queuename}_TotalJobs=%d " TotalRunningJobs+TotalIdleJobs \\
-format "${queuename}_MaxCPUTime=%s " MaxCPUTime \\
-format "${queuename}_MaxWallClockTime=%s " MaxWallclockTime \\
-format "${queuename}_GlueStatus=%s " GlueStatus \\
-format "${queuename}_MaxMainMemory=%s " MaxMainMemory \\
-format "${queuename}_MaxSlotsPerJob=%s " MaxSlotsPerJob \\
-format "${queuename}_MaxVirtualMemory=%s " MaxVirtualMemory`"
eval $vardef
done
elif [[ "$deployment" == "queue_to_jobattribute" ]]; then
## There is no relation between schedds and queues
## The queue information are stored as variables in the configuration file
for queue in ${queues[@]}; do
queuename=`echo ${queue} | sanitize_name`
eval "declare -i ${queuename}_TotalJobs=0"
eval "declare -i ${queuename}_WaitingJobs=0"
eval "declare -i ${queuename}_RunningJobs=0"
done
constr="(JobStatus == 1 || JobStatus == 2)"
while read queue status; do
queuename=`echo ${queue} | sanitize_name`
eval "let ${queuename}_TotalJobs+=1"
if (( $status == 1 )); then
eval "let ${queuename}_WaitingJobs+=1"
elif (( $status == 2 )); then
eval "let ${queuename}_RunningJobs+=1"
fi
done < \
<($condorq -global -constr "$constr" -format "%s " $queue_attribute -format "%s\n" JobStatus)
else
echo "$0: wrong or missing 'deployment' value in $config_file" >&2
exit 1
fi
# Store the common information for all queues
#
vardef="`$condorstatus -collector \\
-format "CondorVersion='%s' " CondorVersion \\
-format "HostsTotal=%s " HostsTotal \\
-format "HostsUnclaimed=%s " HostsUnclaimed`"
eval $vardef
# extract the substring between the first and the second ' '
CondorVersion=${CondorVersion#* }
CondorVersion=${CondorVersion%% *}
# Properly handle slots and CPUs counting in a mix
# of static and partitionable slots.
while read n1 n2 n3 n4; do
let TotalCpus+=n1;
let FreeCpus+=n2;
let TotalSlots+=n3;
let FreeSlots+=n4;
done < \
<( $condorstatus -constr 'SlotType != "Dynamic"' \
-format "%d " TotalSlotCpus \
-format "%d " 'ifThenElse(SlotType == "Partitionable", Cpus, ifThenElse(State == "Unclaimed", Cpus, 0))' \
-format "%d " 'ifThenElse(SlotType == "Partitionable", TotalSlotCpus, 1)' \
-format "%d\n" 'ifThenElse(SlotType == "Partitionable", Cpus, ifThenElse(State == "Unclaimed", 1, 0))'
)
timenow=`date -u +%Y-%m-%dT%H:%M:%SZ`
# Print Glue1 information
#
if [[ "$outputformat" == "glue1" || "$outputformat" == "both" ]]; then
if [[ ! -r $glue1_ldif_file ]]; then
echo "$0: cannot read static LDIF file $ldif_file" >&2
exit 1
fi
grep -e 'dn:[ ]*GlueCEUniqueID=' $glue1_ldif_file |
while read ldifline ; do
queue=${ldifline##*cream-condor-}
queue=${queue%%,*}
queuename=`echo ${queue} | sanitize_name`
glueline="\
$ldifline
GlueCEInfoLRMSType: condor
GlueCEInfoLRMSVersion: $CondorVersion
GlueCEPolicyMaxRunningJobs: $TotalSlots
GlueCEPolicyAssignedJobSlots: $TotalSlots
GlueCEInfoTotalCPUs: $TotalCpus
GlueCEStateFreeCPUs: $FreeCpus
GlueCEStateFreeJobSlots: $FreeSlots
GlueCEStateWaitingJobs: \${${queuename}_WaitingJobs:-${undefvalue}}
GlueCEStateRunningJobs: \${${queuename}_RunningJobs:-${undefvalue}}
GlueCEStateTotalJobs: \${${queuename}_TotalJobs:-${undefvalue}}
GlueCEPolicyMaxCPUTime: \${${queuename}_MaxCPUTime:-${undefvalue}}
GlueCEPolicyMaxWallClockTime: \${${queuename}_MaxWallClockTime:-${undefvalue}}
GlueCEPolicyMaxObtainableCPUTime: \${${queuename}_MaxCPUTime:-${undefvalue}}
GlueCEPolicyMaxObtainableWallClockTime: \${${queuename}_MaxWallClockTime:-${undefvalue}}
GlueCEStateStatus: \${${queuename}_GlueStatus:-${undefvalue}}
"
eval "echo \"$glueline\" | grep -v \"$undefvalue\""
done
fi
# Print Glue2 information
#
if [[ "$outputformat" == "glue2" || "$outputformat" == "both" ]]; then
# Computing manager
if [[ ! -r $glue2_static_file_computing_manager ]]; then
echo "$0: cannot read static LDIF file $glue2_static_file_computing_manager" >&2
exit 1
fi
grep -e 'dn:[ ]*GLUE2ManagerId=' $glue2_static_file_computing_manager |
while read ldifline ; do
glueline="\
$ldifline
GLUE2ManagerProductVersion: $CondorVersion
"
eval "echo \"$glueline\""
done
# Computing share
if [[ ! -r $glue2_static_file_computing_share ]]; then
echo "$0: cannot read static LDIF file $glue2_static_file_computing_share" >&2
exit 1
fi
grep -e 'dn:[ ]*GLUE2ShareID=' $glue2_static_file_computing_share |
while read ldifline ; do
queue=${ldifline##*GLUE2ShareID=}
queue=${queue%%_*}
queuename=`echo ${queue} | sanitize_name`
glueline="\
$ldifline
GLUE2ComputingShareDefaultCPUTime: \${${queuename}_MaxCPUTime:-${undefvalue}}
GLUE2ComputingShareDefaultWallTime: \${${queuename}_MaxWallClockTime:-${undefvalue}}
GLUE2ComputingShareMaxCPUTime: \${${queuename}_MaxCPUTime:-${undefvalue}}
GLUE2ComputingShareMaxWallTime: \${${queuename}_MaxWallClockTime:-${undefvalue}}
GLUE2ComputingShareMaxMainMemory: \${${queuename}_MaxMainMemory:-${undefvalue}}
GLUE2ComputingShareMaxSlotsPerJob: \${${queuename}_MaxSlotsPerJob:-${undefvalue}}
GLUE2ComputingShareMaxVirtualMemory: \${${queuename}_MaxVirtualMemory:-${undefvalue}}
GLUE2ComputingShareServingState: \$(echo \${${queuename}_GlueStatus:-production} | tr [:upper:] [:lower:])
GLUE2ComputingShareMaxRunningJobs: $TotalSlots
GLUE2ComputingShareWaitingJobs: \${${queuename}_WaitingJobs:-${undefvalue}}
"
eval "echo \"$glueline\" | grep -v \"$undefvalue\""
done
fi