-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_asli_pipeline.sh
58 lines (50 loc) · 2.05 KB
/
run_asli_pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
set -e
# Read in config file
source ENVS
# Activate virtual environment
source ${ASLI_VENV}
# Data should already have been fetched with 00_download_era5.sh
if [ ! -d $DATA_DIR ]; then
echo "There is no data directory. Do you need to run src/00_download_era5.sh first?"
exit 1
fi
# Run calculations, writes an output file in $OUTPUT_DIR
bash src/01_run_asli_calculations.sh
# Script with carries out quality control checks on the data, can be configured in ENVS
# Failure of checks will stop execution
Rscript src/02_quality_checks.R "$OUTPUT_DIR/asli_calculation_$FILE_IDENTIFIER.csv" $SD_FROM_MEAN $ACTCENPRES_BOUNDS_MIN $ACTCENPRES_BOUNDS_MAX $MEDIAN_ABSOLUTE_DEVIATIONS
# Exports files to destination, either object storage of classic file system
# This also determines the file export format
case ${FILE_DESTINATION} in
OBJECT_STORAGE)
# Run checks on whether new data matches previous data
# Provide old and new file
# Only run if it is not the first run, ie there is a file to compare against
if [[ "${FIRST_RUN}" != true ]]; then
Rscript src/03_verify_no_past_changes.R "$OUTPUT_DIR/asli_calculation_$FILE_IDENTIFIER.csv" "$S3_BUCKET/asli_calculation_$FILE_IDENTIFIER.csv"
fi
bash src/04_export_to_object_store.sh
;;
# Putting in a fallthrough for BOTH
# ie when BOTH is matched, it also runs FILE_SYSTEM
BOTH)
if [[ "${FIRST_RUN}" != true ]]; then
Rscript src/03_verify_no_past_changes.R "$OUTPUT_DIR/asli_calculation_$FILE_IDENTIFIER.csv" "$S3_BUCKET/asli_calculation_$FILE_IDENTIFIER.csv"
fi
bash src/04_export_to_object_store.sh
;&
FILE_SYSTEM)
if [[ "${FIRST_RUN}" != true ]]; then
Rscript src/03_verify_no_past_changes.R "$OUTPUT_DIR/asli_calculation_$FILE_IDENTIFIER.csv" "$RSYNC_LOCATION/asli_calculation_$FILE_IDENTIFIER.csv"
fi
bash src/05_export_to_file_system.sh
;;
*)
echo "ERROR: $FILE_DESTINATION is not a valid destination, choose from: ${VALID_DESTINATIONS[@]}"
exit 1
;;
esac
# Clean up the data dir, but retain output
# If I use $DATA_DIR here it will only remove /monthly
rm -r $PIPELINE_DIRECTORY/data