forked from columnflow/columnflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_analysis.sh
executable file
·348 lines (275 loc) · 11.3 KB
/
create_analysis.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
#!/usr/bin/env bash
# Script that creates a minimal analysis project based on columnflow.
#
# Execute as (e.g.)
# > bash -c "$(curl -Ls https://raw.githubusercontent.com/columnflow/columnflow/master/create_analysis.sh)"
#
# A few variables are queried at the beginning of the project creation and inserted into a template
# analysis. For more insights, checkout the "analysis_templates" directory.
create_analysis() {
#
# locals
#
local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
local exec_dir="$( pwd )"
local fetch_cf_branch="master"
local fetch_cmsdb_branch="master"
local debug="${CF_CREATE_ANALYSIS_DEBUG:-false}"
# zsh options
if ${shell_is_zsh}; then
emulate -L bash
setopt globdots
fi
#
# helpers
#
str_lc() {
${shell_is_zsh} && echo "${(L)1}" || echo "${1,,}"
}
str_uc() {
${shell_is_zsh} && echo "${(U)1}" || echo "${1^^}"
}
export_var() {
local varname="$1"
local value="$2"
export $varname="$( eval "echo ${value}" )"
}
echo_color() {
local color="$1"
local msg="${@:2}"
case "${color}" in
red)
echo -e "\x1b[0;49;31m${msg}\x1b[0m"
;;
green)
echo -e "\x1b[0;49;32m${msg}\x1b[0m"
;;
yellow)
echo -e "\x1b[0;49;33m${msg}\x1b[0m"
;;
cyan)
echo -e "\x1b[0;49;36m${msg}\x1b[0m"
;;
magenta)
echo -e "\x1b[0;49;35m${msg}\x1b[0m"
;;
bright)
echo -e "\x1b[1;49;39m${msg}\x1b[0m"
;;
green_bright)
echo -e "\x1b[1;49;32m${msg}\x1b[0m"
;;
*)
echo "${msg}"
;;
esac
}
query_input() {
local varname="$1"
local text="$2"
local default="$3"
local choices="$4"
# build the query text
local input_line="${text}"
local opened_parenthesis="false"
if [ ! -z "${choices}" ]; then
opened_parenthesis="true"
input_line="${input_line} (choices: '${choices}'"
fi
if [ "${default}" != "-" ]; then
${opened_parenthesis} && input_line="${input_line}, " || input_line="${input_line} ("
opened_parenthesis="true"
input_line="${input_line}default: '${default}'"
fi
${opened_parenthesis} && input_line="${input_line})"
input_line="${input_line}: "
# first query
printf "${input_line}"
read query_response
# input checks
while true; do
# handle empty responses
if [ "${query_response}" = "" ]; then
# re-query empty values without defaults
if [ "${default}" = "-" ]; then
echo_color yellow "a value is required"
printf "${input_line}"
read query_response
continue
else
query_response="${default}"
fi
fi
# compare to choices when given
if [ ! -z "${choices}" ] && [[ ! ",${choices}," =~ ",${query_response}," ]]; then
echo_color yellow "invalid choice"
printf "${input_line}"
read query_response
continue
fi
# check characters
if [[ ! "${query_response}" =~ ^[a-zA-Z0-9_]*$ ]]; then
echo_color yellow "only alpha-numeric characters and underscores are allowed"
printf "${input_line}"
read query_response
continue
fi
break
done
# strip " and ' on both sides
query_response="${query_response%\"}"
query_response="${query_response%\'}"
query_response="${query_response#\"}"
query_response="${query_response#\'}"
export_var "${varname}" "${query_response}"
}
#
# queries
#
echo_color bright "start creating columnflow-based analysis in local directory"
echo
query_input "cf_analysis_name" "Name of the analysis" "-"
echo
query_input "cf_module_name" "Name of the python module in the analysis directory" "$( str_lc "${cf_analysis_name}" )"
echo
query_input "cf_short_name" "Short name for environment variables, pre- and suffixes" "${cf_module_name}"
echo
query_input "cf_analysis_flavor" "The flavor of the analysis to setup" "cms_minimal" "cms_minimal"
echo
query_input "cf_use_ssh" "Use ssh for git submodules" "True" "True,False"
echo
# changes
export cf_short_name="${cf_short_name%_}"
export cf_short_name_lc="$( str_lc "${cf_short_name}" )"
export cf_short_name_uc="$( str_uc "${cf_short_name}" )"
# debug output
if ${debug}; then
echo "analysis name : ${cf_analysis_name}"
echo "module name : ${cf_module_name}"
echo "short name lc : ${cf_short_name_lc}"
echo "short name uc : ${cf_short_name_uc}"
echo "analysis flavor: ${cf_analysis_flavor}"
echo "use ssh : ${cf_use_ssh}"
echo
fi
#
# checkout the analysis template
#
local cf_analysis_base="${exec_dir}/${cf_analysis_name}"
if [ -d "${cf_analysis_base}" ]; then
>&2 echo "directory '${cf_analysis_base}' already exists, please remove it and start again"
return "1"
fi
echo_color cyan "checking out analysis tempate to ${cf_analysis_base}"
if ${debug}; then
cp -r "${this_dir}/analysis_templates/${cf_analysis_flavor}" "${cf_analysis_base}"
cd "${cf_analysis_base}" || return "$?"
else
rm -rf "${exec_dir}/.cf_analysis_setup"
mkdir -p "${exec_dir}/.cf_analysis_setup" || return "$?"
cd "${exec_dir}/.cf_analysis_setup"
curl -L -s -k "https://github.com/columnflow/columnflow/tarball/${fetch_cf_branch}" | tar -xz || return "$?"
mv columnflow-columnflow-*/"analysis_templates/${cf_analysis_flavor}" "${cf_analysis_base}" || return "$?"
cd "${cf_analysis_base}" || return "$?"
rm -rf "${exec_dir}/.cf_analysis_setup"
fi
echo_color green "done"
echo
#
# insert variables
#
# rename files
echo_color cyan "renaming files"
PATH="/usr/bin" find . -depth -name '*__cf_analysis_name__*' -execdir bash -c 'mv "$1" "${1//__cf_analysis_name__/'${cf_analysis_name}'}"' bash {} \;
PATH="/usr/bin" find . -depth -name '*__cf_module_name__*' -execdir bash -c 'mv "$1" "${1//__cf_module_name__/'${cf_module_name}'}"' bash {} \;
PATH="/usr/bin" find . -depth -name '*__cf_short_name_lc__*' -execdir bash -c 'mv -i "$1" "${1//__cf_short_name_lc__/'${cf_short_name_lc}'}"' bash {} \;
PATH="/usr/bin" find . -depth -name '*__cf_short_name_uc__*' -execdir bash -c 'mv -i "$1" "${1//__cf_short_name_uc__/'${cf_short_name_uc}'}"' bash {} \;
echo_color green "done"
echo
# update files
echo_color cyan "inserting placeholders"
PATH="/usr/bin" find . -type f -execdir sed -i 's/__cf_analysis_name__/'${cf_analysis_name}'/g' {} \;
PATH="/usr/bin" find . -type f -execdir sed -i 's/__cf_module_name__/'${cf_module_name}'/g' {} \;
PATH="/usr/bin" find . -type f -execdir sed -i 's/__cf_short_name_lc__/'${cf_short_name_lc}'/g' {} \;
PATH="/usr/bin" find . -type f -execdir sed -i 's/__cf_short_name_uc__/'${cf_short_name_uc}'/g' {} \;
echo_color green "done"
#
# setup git and submodules
#
echo
echo_color cyan "setup git repository"
git init
echo_color green "done"
echo
echo_color cyan "enable lfs"
git lfs install
echo_color green "done"
echo
echo_color cyan "setup submodules"
local gh_prefix="https://github.com/"
$( str_lc "${cf_use_ssh}" ) && gh_prefix="[email protected]:"
mkdir -p modules
if ${debug}; then
ln -s "${this_dir}" modules/columnflow
else
git submodule add -b "${fetch_cf_branch}" "${gh_prefix}columnflow/columnflow.git" modules/columnflow
fi
if [ "${cf_analysis_flavor}" = "cms_minimal" ]; then
git submodule add -b "${fetch_cmsdb_branch}" "${gh_prefix}uhh-cms/cmsdb.git" modules/cmsdb
fi
git submodule update --init --recursive
echo_color green "done"
#
# minimal setup instructions
#
echo
echo_color green_bright "Setup successfull! The next steps are:"
echo
echo_color cyan "1. Setup the repository and install the environment."
echo_color bright " > cd ${cf_analysis_name}"
echo_color bright " > source setup.sh [recommended_yet_optional_setup_name]"
echo
echo_color cyan "2. Run local tests & linting checks to verify that the analysis is setup correctly."
echo_color bright " > ./tests/run_all"
echo
echo_color cyan "3. Create a GRID proxy if you intend to run tasks that need one"
if [ "${cf_analysis_flavor}" = "cms_minimal" ]; then
echo_color bright " > voms-proxy-init -voms cms -rfc -valid 196:00"
else
echo_color bright " > voms-proxy-init -rfc -valid 196:00"
fi
echo
echo_color cyan "4. Checkout the 'Getting started' guide to run your first tasks."
echo " https://columnflow.readthedocs.io/en/stable/start.html"
echo
echo " Suggestions for tasks to run:"
echo
echo " a) Run the 'calibration -> selection -> reduction' pipeline for the first file of the"
echo " default dataset using the default calibrator and default selector"
echo " (enter the command below and 'tab-tab' to see all arguments or add --help for help)"
echo_color bright " > law run cf.ReduceEvents --version dev1 --branch 0"
echo
echo " Verify what you just run by adding '--print-status -1' (-1 = fully recursive)"
echo_color bright " > law run cf.ReduceEvents --version dev1 --branch 0 --print-status -1"
echo
echo " b) Create the jet1_pt distribution for the single top datasets"
echo " (if you have an image/pdf viewer installed, add it via '--view-cmd <binary>')"
echo_color bright " > law run cf.PlotVariables1D --version dev1 --datasets 'st*' --variables jet1_pt"
echo
echo " Again, verify what you just ran, now with recursion depth 4"
echo_color bright " > law run cf.PlotVariables1D --version dev1 --datasets 'st*' --variables jet1_pt --print-status 4"
echo
echo " c) Include the ttbar dataset and also plot jet1_eta"
echo_color bright " > law run cf.PlotVariables1D --version dev1 --datasets 'tt*,st*' --variables jet1_pt,jet1_eta"
if [ "${cf_analysis_flavor}" = "cms_minimal" ]; then
echo
echo " d) Create cms-style datacards for the example model in ${cf_module_name}/inference/example.py"
echo_color bright " > law run cf.CreateDatacards --version dev1 --inference-model example"
echo
echo "$( echo_color magenta "Please note that the '${cf_analysis_flavor}' example needs access to a few files on" ) $( echo_color bright "/afs/cern.ch" )!"
fi
echo
}
create_analysis "$@"