Skip to content

Commit

Permalink
Merge branch 'master' into cert
Browse files Browse the repository at this point in the history
  • Loading branch information
covesturtevant committed Dec 14, 2024
2 parents 5f5a96e + eec451b commit d42a306
Showing 1 changed file with 41 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,47 +53,47 @@ transform:
"DirSubCopy=science_review_flags|group|location"
# # Export Level 1 data to bucket
# export OUT_PATH=/pfs/out
# linkdir=$(mktemp -d)
# shopt -s globstar
# out_parquet_glob="${OUT_PATH}/**/*.parquet"
# # Example: /2024/01/18/par-quantum-line_UKFS001000/data/par-quantum-line_UKFS001000_2024-01-18_PARQL_1min_001.parquet
# echo "Linking output files to ${linkdir}"
# #set -x # Echo commands to output for debugging
# fname=""
# for f in $out_parquet_glob; do
# if [[ -f "$f" ]]; then
# # Parse the path
# [[ "$f" =~ ^$OUT_PATH/([0-9]+)/([0-9]+)/([0-9]+)/(${GROUP_PREFIX}_[A-Za-z0-9]+)/data/(.*)$ ]]
# fyear="${BASH_REMATCH[1]}"
# fmonth="${BASH_REMATCH[2]}"
# fday="${BASH_REMATCH[3]}"
# fgroup="${BASH_REMATCH[4]}"
# fname="${BASH_REMATCH[5]}"
# # Now get the timing index from the file name
# [[ "$fname" =~ ^${GROUP_PREFIX}_[A-Za-z0-9]+_${fyear}-${fmonth}-${fday}_[A-Za-z0-9]+_([A-Za-z0-9]+)_([A-Za-z0-9]+).parquet ]]
# avg_int="${BASH_REMATCH[1]}"
# #Form the output path and link
# outdir="${linkdir}/v2/${GROUP_PREFIX}/${avg_int}/group=${fgroup}/ms=${fyear}-${fmonth}"
# mkdir -p "${outdir}"
# ln -s "${f}" "${outdir}/${fname}"
# fi
# done
# #set +x
# if [[ "${fname}" ]]; then
# echo "Syncing files to bucket"
# rclone \
# --no-check-dest \
# --copy-links \
# --gcs-bucket-policy-only \
# --gcs-no-check-bucket \
# copy \
# "${linkdir}" \
# ":gcs://${BUCKET_NAME}"
# echo "Removing temporary files"
# rm -rf $linkdir
# fi
# Export Level 1 data to bucket
export OUT_PATH=/pfs/out
linkdir=$(mktemp -d)
shopt -s globstar
out_parquet_glob="${OUT_PATH}/**/*.parquet"
# Example: /2024/01/18/par-quantum-line_UKFS001000/data/par-quantum-line_UKFS001000_2024-01-18_PARQL_1min_001.parquet
echo "Linking output files to ${linkdir}"
#set -x # Echo commands to output for debugging
fname=""
for f in $out_parquet_glob; do
if [[ -f "$f" ]]; then
# Parse the path
[[ "$f" =~ ^$OUT_PATH/([0-9]+)/([0-9]+)/([0-9]+)/(${GROUP_PREFIX}_[A-Za-z0-9]+)/data/(.*)$ ]]
fyear="${BASH_REMATCH[1]}"
fmonth="${BASH_REMATCH[2]}"
fday="${BASH_REMATCH[3]}"
fgroup="${BASH_REMATCH[4]}"
fname="${BASH_REMATCH[5]}"
# Now get the timing index from the file name
[[ "$fname" =~ ^${GROUP_PREFIX}_[A-Za-z0-9]+_${fyear}-${fmonth}-${fday}_[A-Za-z0-9]+_([A-Za-z0-9]+)_([A-Za-z0-9]+).parquet ]]
avg_int="${BASH_REMATCH[1]}"
#Form the output path and link
outdir="${linkdir}/v2/${GROUP_PREFIX}/${avg_int}/group=${fgroup}/ms=${fyear}-${fmonth}"
mkdir -p "${outdir}"
ln -s "${f}" "${outdir}/${fname}"
fi
done
#set +x
if [[ "${fname}" ]]; then
echo "Syncing files to bucket"
rclone \
--no-check-dest \
--copy-links \
--gcs-bucket-policy-only \
--gcs-no-check-bucket \
copy \
"${linkdir}" \
":gcs://${BUCKET_NAME}"
echo "Removing temporary files"
rm -rf $linkdir
fi
EOF
env:
Expand Down

0 comments on commit d42a306

Please sign in to comment.