forked from modelscope/data-juicer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
2_multi_op_pipline.yaml
58 lines (52 loc) · 2.15 KB
/
2_multi_op_pipline.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Sandbox config example
# global parameters
project_name: 'demo-bench'
experiment_name: 'single_op_language_score' # for wandb tracer name
work_dir: './outputs/demo-bench' # the default output dir for meta logging
# configs for each job, the jobs will be executed according to the order in the list
probe_job_configs:
refine_recipe_job_configs:
execution_job_configs:
- hook: 'ProcessDataHook'
meta_name:
dj_configs:
project_name: 'demo-bench'
dataset_path: './demos/data/demo-dataset-videos.jsonl' # path to your dataset directory or file
export_path: './outputs/demo-bench/demo-dataset-with-multi-op-stats.jsonl'
export_original_dataset: true # must be true to keep statistics values with dataset
process:
# select samples with high language score
- language_id_score_filter:
lang:
min_score: 0.7206037306785583 # this value can be observed in the analysis result of the probe job in one op experiments
# select samples with middle video duration
- video_duration_filter:
min_duration: 19.315000 # this value can be observed in the analysis result of the probe job in one op experiments
max_duration: 32.045000 # this value can be observed in the analysis result of the probe job in one op experiments
extra_configs:
- hook: 'ProcessDataHook'
meta_name:
dj_configs:
project_name: 'demo-bench'
dataset_path: './outputs/demo-bench/demo-dataset-with-multi-op-stats.jsonl'
export_path: './outputs/demo-bench/demo-dataset-for-train.jsonl'
process:
- random_selector:
select_num: 16
extra_configs:
# train model
- hook: 'TrainModelHook'
meta_name:
dj_configs:
extra_configs: './configs/demo/bench/model_train.yaml'
# infer model
- hook: 'InferModelHook'
meta_name:
dj_configs:
extra_configs: './configs/demo/bench/model_infer.yaml'
evaluation_job_configs:
# vbench evaluation
- hook: 'EvaluateDataHook'
meta_name: 'vbench_eval'
dj_configs:
extra_configs: './configs/demo/bench/vbench_eval.yaml'