forked from columnflow/columnflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
law.cfg
239 lines (178 loc) · 7.69 KB
/
law.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
[modules]
columnflow.tasks.framework
columnflow.tasks.external
columnflow.tasks.calibration
columnflow.tasks.selection
columnflow.tasks.reduction
columnflow.tasks.production
columnflow.tasks.ml
columnflow.tasks.union
columnflow.tasks.histograms
columnflow.tasks.plotting
columnflow.tasks.yields
columnflow.tasks.cutflow
columnflow.tasks.inspection
[analysis]
default_analysis: columnflow.example_config.analysis_st.analysis_st
default_config: run2_pp_2018
default_dataset: st_tchannel_t
production_modules: columnflow.production.{categories,processes,normalization}
calibration_modules: columnflow.calibration
selection_modules: columnflow.selection.{empty}
categorization_modules: columnflow.categorization
weight_production_modules: columnflow.weight.{empty,all_weights}
ml_modules: columnflow.ml
inference_modules: columnflow.inference
# namespace of all columnflow tasks
cf_task_namespace: cf
# default sandbox for main tasks with standard packages for columnar processing
default_columnar_sandbox: bash::$CF_BASE/sandboxes/venv_columnar.sh
# whether MergeReducedEvents should keep its inputs from ReduceEvents by default
# (otherwise they are removed after merging)
default_keep_reduced_events: True
# whether to shift values that end up exactly on the edge of the last bin during histogramming
# slightly to the left to avoid them being excluded from the last bin; None leads to automatic mode
default_histogram_last_edge_inclusive: None
# boolean flag that, if True, sets the *hists* output of cf.SelectEvents and cf.MergeSelectionStats to optional
default_selection_hists_optional: True
# wether or not the ensure_proxy decorator should be skipped, even if used by task's run methods
skip_ensure_proxy: False
# some remote workflow parameter defaults
htcondor_flavor: $CF_HTCONDOR_FLAVOR
htcondor_share_software: False
htcondor_disk: -1
slurm_flavor: $CF_SLURM_FLAVOR
slurm_partition: $CF_SLURM_PARTITION
# ChunkedIOHandler defaults
chunked_io_chunk_size: 100000
chunked_io_pool_size: 2
chunked_io_debug: False
# csv list of task families that inherit from ChunkedReaderMixin and whose output arrays should be
# checked (raising an exception) for non-finite values before saving them to disk
# supported tasks are: cf.CalibrateEvents, cf.SelectEvents, cf.ProduceColumns, cf.PrepareMLEvents,
# cf.MLEvaluation, cf.UniteColumns
check_finite_output: None
# how to treat inexistent selector steps passed to cf.CreateCutflowHistograms: throw an error,
# silently skip them, or add a dummy step to the output (allowed values: error, skip, dummy)
missing_selector_step_strategy: error
# csv list of task families that inherit from ChunkedReaderMixin and whose input columns should be
# checked (raising an exception) for overlaps between fields when created a merged input array
# supported tasks are: cf.SelectEvents, cf.ReduceEvents, cf.ProduceColumns, cf.PrepareMLEvents,
# cf.MLEvaluation, cf.CreateHistograms, cf.UniteColumns
check_overlapping_inputs: None
# whether to log runtimes of array functions by default
log_array_function_runtime: False
[outputs]
# list of all used file systems
wlcg_file_systems: wlcg_fs, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
# list of file systems used by columnflow.tasks.external.GetDatasetLFNs.iter_nano_files to
# look for the correct fs per nano input file (in that order)
lfn_sources: wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
# output locations per task family
# the key can consist of multple underscore-separated parts, that can each be patterns or regexes
# these parts are used for the lookup from within tasks and can contain (e.g.) the analysis name,
# the config name, the task family, the dataset name, or the shift name
# (see AnalysisTask.get_config_lookup_keys() - and subclasses - for the exact order)
# values can have the following format:
# for local targets : "local[, LOCAL_FS_NAME or STORE_PATH][, store_parts_modifier]"
# for remote targets : "wlcg[, WLCG_FS_NAME][, store_parts_modifier]"
# for mirrored targets: "wlcg_mirrored, LOCAL_FS_NAME, WLCG_FS_NAME[, store_parts_modifier]"
# (when WLCG_FS_NAME is empty, the tasks' "default_wlcg_fs" attribute is used)
# the "store_parts_modifiers" can be the name of a function in the "store_parts_modifiers" aux dict
# of the analysis instance, which is called with an output's store parts of an output to modify them
# example:
; run3_2023__cf.CalibrateEvents__nomin*: local
; cf.CalibrateEvents: wlcg
[versions]
# default versions of specific tasks to pin
# the key can consist of multple underscore-separated parts, that can each be patterns or regexes
# these parts are used for the lookup from within tasks and can contain (e.g.) the analysis name,
# the config name, the task family, the dataset name, or the shift name
# (see AnalysisTask.get_config_lookup_keys() - and subclasses - for the exact order)
# note:
# this lookup is skipped if the lookup based on the config instance's auxiliary data succeeded
# example:
; run3_2023__cf.CalibrateEvents__nomin*: prod1
; cf.CalibrateEvents: prod2
[job]
job_file_dir: $CF_JOB_BASE
job_file_dir_cleanup: False
job_file_dir_mkdtemp: sub_{{task_id}}_XXX
# storage element (SE) and output directory on that SE for crab's internal output
# (crab might not even move files there, but it is strictly required for crab's job submission)
crab_storage_element: $CF_CRAB_STORAGE_ELEMENT
crab_base_directory: $CF_CRAB_BASE_DIRECTORY
# lcg setup file sourced in remote jobs to access gfal tools
remote_lcg_setup_el7: /cvmfs/grid.cern.ch/centos7-ui-200122/etc/profile.d/setup-c7-ui-python3-example.sh
remote_lcg_setup_el9: /cvmfs/grid.cern.ch/alma9-ui-test/etc/profile.d/setup-alma9-test.sh
# whether the loading of the remove lcg setup file is enforced
# otherwise this might be skipped in case gfal-ls, etc., are already available
remote_lcg_setup_force: True
[logging]
law: INFO
luigi-interface: INFO
gfal2: WARNING
columnflow.columnar_util-perf: INFO
[local_fs]
base: /
[wlcg_fs]
base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2/store/user/$CF_CERN_USER/$CF_STORE_NAME
create_file_dir: True
use_cache: $CF_WLCG_USE_CACHE
cache_root: $CF_WLCG_CACHE_ROOT
cache_cleanup: $CF_WLCG_CACHE_CLEANUP
cache_max_size: 50GB
[wlcg_fs_desy_store]
base_xrootd: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2
base_gsiftp: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2
base_webdav: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2
base: &::base_xrootd
use_cache: $CF_WLCG_USE_CACHE
cache_root: $CF_WLCG_CACHE_ROOT
cache_cleanup: $CF_WLCG_CACHE_CLEANUP
cache_max_size: 15GB
cache_global_lock: True
cache_mtime_patience: -1
[wlcg_fs_infn_redirector]
base: root://xrootd-cms.infn.it/
use_cache: $CF_WLCG_USE_CACHE
cache_root: $CF_WLCG_CACHE_ROOT
cache_cleanup: $CF_WLCG_CACHE_CLEANUP
cache_max_size: 15GB
cache_global_lock: True
cache_mtime_patience: -1
[wlcg_fs_fnal_redirector]
base: root://cmsxrootd.fnal.gov/
use_cache: $CF_WLCG_USE_CACHE
cache_root: $CF_WLCG_CACHE_ROOT
cache_cleanup: $CF_WLCG_CACHE_CLEANUP
cache_max_size: 15GB
cache_global_lock: True
cache_mtime_patience: -1
[wlcg_fs_global_redirector]
base: root://cms-xrd-global.cern.ch/
use_cache: $CF_WLCG_USE_CACHE
cache_root: $CF_WLCG_CACHE_ROOT
cache_cleanup: $CF_WLCG_CACHE_CLEANUP
cache_max_size: 15GB
cache_global_lock: True
cache_mtime_patience: -1
[luigi_core]
local_scheduler: $CF_LOCAL_SCHEDULER
scheduler_host: $CF_SCHEDULER_HOST
scheduler_port: $CF_SCHEDULER_PORT
parallel_scheduling: False
no_lock: True
log_level: INFO
[luigi_scheduler]
record_task_history: False
remove_delay: 86400
retry_delay: 30
worker_disconnect_delay: 30
[luigi_worker]
ping_interval: 20
wait_interval: 20
check_unfulfilled_deps: False
cache_task_completion: True
keep_alive: $CF_WORKER_KEEP_ALIVE
force_multiprocessing: False