forked from a-r-r-o-w/finetrainers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_dataset.sh
executable file
·48 lines (41 loc) · 1.46 KB
/
prepare_dataset.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
MODEL_ID="THUDM/CogVideoX-2b"
NUM_GPUS=8
# For more details on the expected data format, please refer to the README.
DATA_ROOT="/path/to/my/datasets/video-dataset" # This needs to be the path to the base directory where your videos are located.
CAPTION_COLUMN="prompt.txt"
VIDEO_COLUMN="videos.txt"
OUTPUT_DIR="/path/to/my/datasets/preprocessed-dataset"
HEIGHT_BUCKETS="480 720"
WIDTH_BUCKETS="720 960"
FRAME_BUCKETS="49"
MAX_NUM_FRAMES="49"
MAX_SEQUENCE_LENGTH=226
TARGET_FPS=8
BATCH_SIZE=1
DTYPE=fp32
# To create a folder-style dataset structure without pre-encoding videos and captions
# For Image-to-Video finetuning, make sure to pass `--save_image_latents`
CMD_WITHOUT_PRE_ENCODING="\
torchrun --nproc_per_node=$NUM_GPUS \
training/prepare_dataset.py \
--model_id $MODEL_ID \
--data_root $DATA_ROOT \
--caption_column $CAPTION_COLUMN \
--video_column $VIDEO_COLUMN \
--output_dir $OUTPUT_DIR \
--height_buckets $HEIGHT_BUCKETS \
--width_buckets $WIDTH_BUCKETS \
--frame_buckets $FRAME_BUCKETS \
--max_num_frames $MAX_NUM_FRAMES \
--max_sequence_length $MAX_SEQUENCE_LENGTH \
--target_fps $TARGET_FPS \
--batch_size $BATCH_SIZE \
--dtype $DTYPE
"
CMD_WITH_PRE_ENCODING="$CMD_WITHOUT_PRE_ENCODING --save_latents_and_embeddings"
# Select which you'd like to run
CMD=$CMD_WITH_PRE_ENCODING
echo "===== Running \`$CMD\` ====="
eval $CMD
echo -ne "===== Finished running script =====\n"