-
Notifications
You must be signed in to change notification settings - Fork 0
/
RUN8
122 lines (111 loc) · 3.16 KB
/
RUN8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/bin/bash
MODE=PT1
OUTPUT_PATH=./Exp/E-SwinV1-8/$MODE
mkdir -p $OUTPUT_PATH
# 16 128 768 6*6 target mask rate ((128/16)^2)/((768/16)^2) 0.278
# deepspeed mainSingle.py \
# deepspeed --hostfile=host mainSingle.py \
deepspeed --hostfile=h4 mainSingle.py \
--data_sample_input_path ./DATA/Merge/ \
--data_padmask_input_path ./DATA/PadMask/ \
--val_rate 0.1 \
--pretrain_mask_rate 0.5 \
--data_info ./data/DataInfo \
--target_num_patches 4096 \
--patch_per_var_side 64 \
--model SwinV1 \
--init_model unicornEarth-SwinV1-base \
--train_stage PT1 \
--ckpt_output_dir $OUTPUT_PATH \
--data_output_path $OUTPUT_PATH \
--seed 1017 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--do_eval \
--learning_rate 5e-4 \
--weight_decay 0.1 \
--num_train_epochs 512 \
--gradient_accumulation_steps 1 \
--lr_scheduler_type cosine \
--num_warmup_steps 0 \
--loss_l1_rate 1.0 \
--loss_ms_ssim_rate 2.0 \
--stats_path ./data/Stats/ \
--target_var TCWV \
&> $OUTPUT_PATH/train.log
##
sleep 10
##
#!/bin/bash
MODE=PT2
OUTPUT_PATH=./Exp/E-SwinV1-8/$MODE
mkdir -p $OUTPUT_PATH
# 16 128 768 6*6 target mask rate ((128/16)^2)/((768/16)^2) 0.278
# deepspeed mainSingle.py \
# deepspeed --hostfile=host mainSingle.py \
deepspeed --hostfile=h4 mainSingle.py \
--data_sample_input_path ./DATA/Merge/ \
--data_padmask_input_path ./DATA/PadMask/ \
--val_rate 0.1 \
--pretrain_mask_rate 0.15 \
--data_info ./data/DataInfo \
--target_num_patches 4096 \
--patch_per_var_side 64 \
--model SwinV1 \
--pretrain_model ./Exp/E-SwinV1-8/PT1 \
--train_stage PT2 \
--ckpt_output_dir $OUTPUT_PATH \
--data_output_path $OUTPUT_PATH \
--seed 1017 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--do_eval \
--learning_rate 1e-4 \
--weight_decay 0.1 \
--num_train_epochs 256 \
--gradient_accumulation_steps 1 \
--lr_scheduler_type cosine \
--num_warmup_steps 0 \
--loss_l1_rate 1.0 \
--loss_ms_ssim_rate 2.0 \
--stats_path ./data/Stats/ \
--target_var TCWV \
&> $OUTPUT_PATH/train.log
##
sleep 10
##
#!/bin/bash
NAME=FT
OUTPUT_PATH=./Exp/E-SwinV1-8/$NAME
mkdir -p $OUTPUT_PATH
# 16 128 768 6*6 target mask rate ((128/16)^2)/((768/16)^2) 0.278
# deepspeed mainSingle.py \
# deepspeed --hostfile=host mainSingle.py \
deepspeed --hostfile=h4 mainSingle.py \
--data_sample_input_path ./DATA/Merge/ \
--data_padmask_input_path ./DATA/PadMask/ \
--val_rate 0.1 \
--pretrain_mask_rate 0.15 \
--data_info ./data/DataInfo \
--target_num_patches 4096 \
--patch_per_var_side 64 \
--model SwinV1 \
--pretrain_model ./Exp/E-SwinV1-8/PT2 \
--train_stage FT \
--ckpt_output_dir $OUTPUT_PATH \
--data_output_path $OUTPUT_PATH \
--seed 1017 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--do_eval \
--learning_rate 1e-4 \
--weight_decay 0.1 \
--num_train_epochs 256 \
--gradient_accumulation_steps 1 \
--lr_scheduler_type cosine \
--num_warmup_steps 0 \
--loss_l1_rate 1.0 \
--loss_ms_ssim_rate 2.0 \
--stats_path ./data/Stats/ \
--target_var TCWV \
&> $OUTPUT_PATH/train.log