-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path10.cirrus.sh
28 lines (23 loc) · 1.01 KB
/
10.cirrus.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
if [[ "$(hostname)" =~ "cirrus" ]]; then
function bicleaner_model {
echo "bicleaner models not set up" 1>&2
exit 1
}
# https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.189.bin
export FASTTEXT_LANGID=/beegfs/paracrawl/lid.189.bin
# Note: may need to split this for warcs and processed data, I don't
# want to fill commoncrawl folders with processing data.
declare -A COLLECTIONS=(
["wide00015"]="/beegfs/paracrawl/data/ia/wide00015"
["cc-2016-30"]="/beegfs/paracrawl/data/common_crawl/CC-MAIN-2016-30"
["cc-2017-30"]="/beegfs/paracrawl/data/common_crawl/CC-MAIN-2017-30"
["cc-2018-30"]="/beegfs/paracrawl/data/common_crawl/CC-MAIN-2018-30"
["cc-2019-18"]="/beegfs/paracrawl/data/common_crawl/CC-MAIN-2019-18"
["cc-2019-35"]="/beegfs/paracrawl/data/common_crawl/CC-MAIN-2019-35"
)
export SLURM_LOGS="$HOME/logs"
# Where jobs should be executed. Values used in functions.sh/schedule.
export SBATCH_ACCOUNT=ec166-guest
export SBATCH_PARTITION=standard
export SBATCH_QOS=standard
fi