diff --git a/egs2/README.md b/egs2/README.md
index 3044916214c..28b95808dc4 100644
--- a/egs2/README.md
+++ b/egs2/README.md
@@ -78,6 +78,7 @@ See: https://espnet.github.io/espnet/espnet2_tutorial.html#recipes-using-espnet2
 | iwslt21_low_resource    | ALFFA, IARPA Babel, Gamayun, IWSLT 2021                                                                                          | ASR                     | SWA                   | http://www.openslr.org/25/ https://catalog.ldc.upenn.edu/LDC2017S05 https://gamayun.translatorswb.org/data/ https://iwslt.org/2021/low-resource |              |
 | iwslt22_dialect         | IWSLT2022 dialectal speech translation shared task                                                                               | ASR/ST                  | ARA->Tunisian ARA     | https://github.com/kevinduh/iwslt22-dialect.git                                                              |              |
 | iwslt22_low_resource | IWSLT2022 Low-resource speech translation track task                                                                               | ST                  | Tamasheq->FrenchPermalink     | https://github.com/mzboito/IWSLT2022_Tamasheq_data.git |
+| iwslt24_indic           | IWSLT2024 Indic speech translation track | ST | ENG -> HIN, BEN, TAM | https://iwslt.org/2024/indic | |
 | jdcinal                 | Japanese Dialogue Corpus of Information Navigation and Attentive Listening Annotated with Extended ISO-24617-2 Dialogue Act Tags | SLU                     | JPN                   | http://www.lrec-conf.org/proceedings/lrec2018/pdf/464.pdf http://tts.speech.cs.cmu.edu/awb/infomation_navigation_and_attentive_listening_0.2.zip |              |
 | jkac                    | J-KAC: Japanese Kamishibai and audiobook corpus                                                                                  | TTS                     | JPN                  | https://sites.google.com/site/shinnosuketakamichi/research-topics/j-kac_corpus                               |              |
 | jmd                     | JMD: Japanese multi-dialect corpus for speech synthesis                                                                          | TTS                     | JPN                  | https://sites.google.com/site/shinnosuketakamichi/research-topics/jmd_corpus                                 |              |
diff --git a/egs2/TEMPLATE/asr1/db.sh b/egs2/TEMPLATE/asr1/db.sh
index 5f4417b642e..6d778cf84e1 100755
--- a/egs2/TEMPLATE/asr1/db.sh
+++ b/egs2/TEMPLATE/asr1/db.sh
@@ -169,6 +169,7 @@ CMU_INDIC=downloads
 INDIC_SPEECH=downloads
 IWSLT22_DIALECT=
 IWSLT22_LOW_RESOURCE=downloads
+IWSLT24_INDIC=
 JKAC=
 MUCS_SUBTASK1=downloads
 MUCS_SUBTASK2=downloads
diff --git a/egs2/iwslt24_indic/st1/RESULTS.md b/egs2/iwslt24_indic/st1/RESULTS.md
new file mode 100644
index 00000000000..f592efef06e
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/RESULTS.md
@@ -0,0 +1,67 @@
+# RESULTS
+
+## En-Hi
+
+### Environments
+- date: `Thu Apr 18 01:34:53 JST 2024`
+- python version: `3.10.14 (main, Mar 21 2024, 16:24:04) [GCC 11.2.0]`
+- espnet version: `espnet 202402`
+- pytorch version: `pytorch 2.1.0`
+- Git hash: `83c179ab842987cf01642df2db372aaae260df55`
+  - Commit date: `Wed Apr 17 00:28:29 2024 +0900`
+
+### Model config
+
+- training: [./conf/tuning/train_st_conformer.yaml](./conf/tuning/train_st_conformer.yaml)
+- decoding: [./conf/tuning/decode_st_conformer.yaml](./conf/tuning/decode_st_conformer.yaml)
+- model url: [https://huggingface.co/espnet/iwslt24_indic_en_hi_bpe_tc4000](https://huggingface.co/espnet/iwslt24_indic_en_hi_bpe_tc4000)
+
+### BLEU
+
+|dataset|score|verbose_score|
+|---|---|---|
+|decode_st_conformer_st_model_valid.acc.ave/dev.en-hi|37.1|64.8/44.9/34.2/26.2 (BP = 0.924 ratio = 0.927 hyp_len = 195297 ref_len = 210636)|
+
+## En-Bn
+
+### Environments
+- date: `Wed Apr 17 02:51:38 JST 2024`
+- python version: `3.10.14 (main, Mar 21 2024, 16:24:04) [GCC 11.2.0]`
+- espnet version: `espnet 202402`
+- pytorch version: `pytorch 2.1.0`
+- Git hash: `83c179ab842987cf01642df2db372aaae260df55`
+  - Commit date: `Wed Apr 17 00:28:29 2024 +0900`
+
+### Model config
+
+- training: [./conf/tuning/train_st_conformer.yaml](./conf/tuning/train_st_conformer.yaml)
+- decoding: [./conf/tuning/decode_st_conformer.yaml](./conf/tuning/decode_st_conformer.yaml)
+- model url: [https://huggingface.co/espnet/iwslt24_indic_en_bn_bpe_tc4000](https://huggingface.co/espnet/iwslt24_indic_en_bn_bpe_tc4000)
+
+### BLEU
+
+|dataset|score|verbose_score|
+|---|---|---|
+|decode_st_conformer_st_model_valid.acc.ave/dev.en-bn|2.1|19.7/3.6/1.0/0.3 (BP = 1.000 ratio = 1.185 hyp_len = 46094 ref_len = 38883)|
+
+# En-Ta
+
+## Environments
+- date: `Thu Apr 18 01:03:59 JST 2024`
+- python version: `3.10.14 (main, Mar 21 2024, 16:24:04) [GCC 11.2.0]`
+- espnet version: `espnet 202402`
+- pytorch version: `pytorch 2.1.0`
+- Git hash: `83c179ab842987cf01642df2db372aaae260df55`
+  - Commit date: `Wed Apr 17 00:28:29 2024 +0900`
+
+### Model config
+
+- training: [./conf/tuning/train_st_conformer.yaml](./conf/tuning/train_st_conformer.yaml)
+- decoding: [./conf/tuning/decode_st_conformer.yaml](./conf/tuning/decode_st_conformer.yaml)
+- model url: [https://huggingface.co/espnet/iwslt24_indic_en_ta_bpe_tc4000](https://huggingface.co/espnet/iwslt24_indic_en_ta_bpe_tc4000)
+
+### BLEU
+
+|dataset|score|verbose_score|
+|---|---|---|
+|decode_st_conformer_st_model_valid.acc.ave/dev.en-ta|6.3|46.5/9.4/4.7/1.9 (BP = 0.798 ratio = 0.816 hyp_len = 66168 ref_len = 81059)|
diff --git a/egs2/iwslt24_indic/st1/cmd.sh b/egs2/iwslt24_indic/st1/cmd.sh
new file mode 100644
index 00000000000..2aae6919fef
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/cmd.sh
@@ -0,0 +1,110 @@
+# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
+# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
+# e.g.
+#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
+#
+# Options:
+#   --time <time>: Limit the maximum time to execute.
+#   --mem <mem>: Limit the maximum memory usage.
+#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
+#   --num-threads <ngpu>: Specify the number of CPU core.
+#   --gpu <ngpu>: Specify the number of GPU devices.
+#   --config: Change the configuration file from default.
+#
+# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
+# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
+# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
+# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
+#
+# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
+# These options are mapping to specific options for each backend and
+# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
+# If jobs failed, your configuration might be wrong for your environment.
+#
+#
+# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
+#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
+# =========================================================~
+
+
+# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh"
+cmd_backend='local'
+
+# Local machine, without any Job scheduling system
+if [ "${cmd_backend}" = local ]; then
+
+    # The other usage
+    export train_cmd="run.pl"
+    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
+    export cuda_cmd="run.pl"
+    # Used for "*_recog.py"
+    export decode_cmd="run.pl"
+
+# Local machine logging to stdout and log file, without any Job scheduling system
+elif [ "${cmd_backend}" = stdout ]; then
+
+    # The other usage
+    export train_cmd="stdout.pl"
+    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
+    export cuda_cmd="stdout.pl"
+    # Used for "*_recog.py"
+    export decode_cmd="stdout.pl"
+
+
+# "qsub" (Sun Grid Engine, or derivation of it)
+elif [ "${cmd_backend}" = sge ]; then
+    # The default setting is written in conf/queue.conf.
+    # You must change "-q g.q" for the "queue" for your environment.
+    # To know the "queue" names, type "qhost -q"
+    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.
+
+    export train_cmd="queue.pl"
+    export cuda_cmd="queue.pl"
+    export decode_cmd="queue.pl"
+
+
+# "qsub" (Torque/PBS.)
+elif [ "${cmd_backend}" = pbs ]; then
+    # The default setting is written in conf/pbs.conf.
+
+    export train_cmd="pbs.pl"
+    export cuda_cmd="pbs.pl"
+    export decode_cmd="pbs.pl"
+
+
+# "sbatch" (Slurm)
+elif [ "${cmd_backend}" = slurm ]; then
+    # The default setting is written in conf/slurm.conf.
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partion" names, type "sinfo".
+    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
+    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
+
+    export train_cmd="slurm.pl"
+    export cuda_cmd="slurm.pl"
+    export decode_cmd="slurm.pl"
+
+elif [ "${cmd_backend}" = ssh ]; then
+    # You have to create ".queue/machines" to specify the host to execute jobs.
+    # e.g. .queue/machines
+    #   host1
+    #   host2
+    #   host3
+    # Assuming you can login them without any password, i.e. You have to set ssh keys.
+
+    export train_cmd="ssh.pl"
+    export cuda_cmd="ssh.pl"
+    export decode_cmd="ssh.pl"
+
+# This is an example of specifying several unique options in the JHU CLSP cluster setup.
+# Users can modify/add their own command options according to their cluster environments.
+elif [ "${cmd_backend}" = jhu ]; then
+
+    export train_cmd="queue.pl --mem 2G"
+    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/queue.conf"
+    export decode_cmd="queue.pl --mem 4G"
+
+else
+    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
+    return 1
+fi
diff --git a/egs2/iwslt24_indic/st1/conf/fbank.conf b/egs2/iwslt24_indic/st1/conf/fbank.conf
new file mode 100644
index 00000000000..75232358639
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/conf/fbank.conf
@@ -0,0 +1,2 @@
+--sample-frequency=16000
+--num-mel-bins=80
diff --git a/egs2/iwslt24_indic/st1/conf/pbs.conf b/egs2/iwslt24_indic/st1/conf/pbs.conf
new file mode 100644
index 00000000000..119509938ce
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/conf/pbs.conf
@@ -0,0 +1,11 @@
+# Default configuration
+command qsub -V -v PATH -S /bin/bash
+option name=* -N $0
+option mem=* -l mem=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l ncpus=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option num_nodes=* -l nodes=$0:ppn=1
+default gpu=0
+option gpu=0
+option gpu=* -l ngpus=$0
diff --git a/egs2/iwslt24_indic/st1/conf/pitch.conf b/egs2/iwslt24_indic/st1/conf/pitch.conf
new file mode 100644
index 00000000000..e959a19d5b8
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/conf/pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs2/iwslt24_indic/st1/conf/queue.conf b/egs2/iwslt24_indic/st1/conf/queue.conf
new file mode 100644
index 00000000000..500582fab31
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/conf/queue.conf
@@ -0,0 +1,12 @@
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option name=* -N $0
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+option num_nodes=* -pe mpi $0  # You must set this PE as allocation_rule=1
+default gpu=0
+option gpu=0
+option gpu=* -l gpu=$0 -q g.q
diff --git a/egs2/iwslt24_indic/st1/conf/slurm.conf b/egs2/iwslt24_indic/st1/conf/slurm.conf
new file mode 100644
index 00000000000..3b229673638
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/conf/slurm.conf
@@ -0,0 +1,14 @@
+# Default configuration
+command sbatch --export=PATH
+option name=* --job-name $0
+option time=* --time $0
+option mem=* --mem-per-cpu $0
+option mem=0
+option num_threads=* --cpus-per-task $0
+option num_threads=1 --cpus-per-task 1
+option num_nodes=* --nodes $0
+default gpu=0
+option gpu=0 -p cpu
+option gpu=* -p gpu --gres=gpu:$0 -c $0  # Recommend allocating more CPU than, or equal to the number of GPU
+# note: the --max-jobs-run option is supported as a special case
+# by slurm.pl and you don't have to handle it in the config file.
diff --git a/egs2/iwslt24_indic/st1/conf/tuning/decode_st_conformer.yaml b/egs2/iwslt24_indic/st1/conf/tuning/decode_st_conformer.yaml
new file mode 100644
index 00000000000..7be5256e877
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/conf/tuning/decode_st_conformer.yaml
@@ -0,0 +1,6 @@
+batch_size: 1
+beam_size: 10
+penalty: 0.0
+maxlenratio: 0.0
+minlenratio: 0.0
+lm_weight: 0.0
diff --git a/egs2/iwslt24_indic/st1/conf/tuning/train_st_conformer.yaml b/egs2/iwslt24_indic/st1/conf/tuning/train_st_conformer.yaml
new file mode 100644
index 00000000000..eaa443f974b
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/conf/tuning/train_st_conformer.yaml
@@ -0,0 +1,89 @@
+batch_type: numel
+batch_bins: 25000000
+accum_grad: 2
+max_epoch: 80
+patience: none
+init: none
+best_model_criterion:
+-   - valid
+    - acc
+    - max
+keep_nbest_models: 10
+
+encoder: conformer
+encoder_conf:
+    output_size: 256
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 12
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: conv2d
+    normalize_before: true
+    macaron_style: true
+    rel_pos_type: latest
+    pos_enc_layer_type: rel_pos
+    selfattention_layer_type: rel_selfattn
+    activation_type: swish
+    use_cnn_module: true
+    cnn_module_kernel: 31
+
+decoder: transformer
+decoder_conf:
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+
+extra_asr_decoder: transformer
+extra_asr_decoder_conf:
+    input_layer: embed
+    num_blocks: 6
+    linear_units: 2048
+    dropout_rate: 0.1
+
+extra_mt_decoder: transformer
+extra_mt_decoder_conf:
+    input_layer: embed
+    num_blocks: 2
+    linear_units: 2048
+    dropout_rate: 0.1
+
+model_conf:
+    asr_weight: 0.3
+    mt_weight: 0.0
+    mtlalpha: 0.3
+    lsm_weight: 0.1
+    length_normalized_loss: false
+
+optim: adam
+optim_conf:
+    lr: 0.002
+    weight_decay: 0.000001
+scheduler: warmuplr
+scheduler_conf:
+    warmup_steps: 25000
+
+frontend_conf:
+  n_fft: 400
+  hop_length: 160
+
+specaug: specaug
+specaug_conf:
+    apply_time_warp: true
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 27
+    num_freq_mask: 2
+    apply_time_mask: true
+    time_mask_width_ratio_range:
+    - 0.
+    - 0.05
+    num_time_mask: 5
diff --git a/egs2/iwslt24_indic/st1/db.sh b/egs2/iwslt24_indic/st1/db.sh
new file mode 120000
index 00000000000..50d86130898
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/db.sh
@@ -0,0 +1 @@
+../../TEMPLATE/asr1/db.sh
\ No newline at end of file
diff --git a/egs2/iwslt24_indic/st1/local/data.sh b/egs2/iwslt24_indic/st1/local/data.sh
new file mode 100755
index 00000000000..8835729d603
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/local/data.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+. ./db.sh || exit 1;
+
+log() {
+    local fname=${BASH_SOURCE[1]##*/}
+    echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+SECONDS=0
+
+tgt_lang=$1  # one of hi (Hindi), bn (Bengali), or ta (Tamil)
+remove_archive=false
+
+log "$0 $*"
+. utils/parse_options.sh
+
+if [ -z "${IWSLT24_INDIC}" ]; then
+    log "Please fill the value of 'IWSLT24_INDIC' of db.sh to indicate where the dataset zip files are downloaded."
+    exit 1
+fi
+
+# check if moses is installed
+if ! command -v tokenizer.perl > /dev/null; then
+    log "Error: The moses tool is not installed. Please install moses as follows: cd ${MAIN_ROOT}/tools && make moses.done"
+    exit 1
+fi
+
+if [ $# -ne 1 ]; then
+    log "Usage: $0 <tgt_lang>"
+    log "e.g.: $0 hi"
+    exit 1
+fi
+
+# check tgt_lang
+tgt_langs="hi_bn_ta"
+if ! echo "${tgt_langs}" | grep -q "${tgt_lang}"; then
+    log "Error: ${tgt_lang} is not supported. It must be one of hi, bn, or ta."
+    exit 1;
+fi
+
+log "Checking download and unpacking the dataset..."
+mkdir -p ${IWSLT24_INDIC}
+local/download_and_unpack.sh ${IWSLT24_INDIC} ${tgt_lang} ${remove_archive}
+
+# ensure new line at the end of file
+for split in train dev; do
+    for ext in en ${tgt_lang} yaml; do
+        filename=${IWSLT24_INDIC}/en-${tgt_lang}/data/${split}/txt/${split}.${ext}
+        # shellcheck disable=SC1003
+        sed -i -e '$a\' "${filename}"
+    done
+done
+# shellcheck disable=SC1003
+sed -i -e '$a\' "${IWSLT24_INDIC}/en-${tgt_lang}/data/tst-COMMON/txt/tst-COMMON.yaml"
+
+log "Preparing data in ESPnet format..."
+local/data_prep.sh ${IWSLT24_INDIC} ${tgt_lang}
+
+log "Successfully finished data preparation. [elapsed=${SECONDS}s]"
diff --git a/egs2/iwslt24_indic/st1/local/data_prep.sh b/egs2/iwslt24_indic/st1/local/data_prep.sh
new file mode 100755
index 00000000000..c4147944a74
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/local/data_prep.sh
@@ -0,0 +1,262 @@
+#!/usr/bin/env bash
+
+# Copyright 2019-2024 Kyoto University (Hirofumi Inaguma, Shuichiro Shimizu)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+# This script converts the IWSLT 2024 Indic track dataset into the format of ESPnet recipe.
+# For ESPnet data format, see https://github.com/espnet/data_example/blob/main/README.md
+
+export LC_ALL=C
+
+log() {
+    local fname=${BASH_SOURCE[1]##*/}
+    echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+. utils/parse_options.sh || exit 1;
+
+if [ "$#" -ne 2 ]; then
+    log "Usage: $0 <data_dir> <tgt_lang>"
+    log "e.g.: $0 /path/to/indic/data hi"
+    exit 1;
+fi
+
+data_dir=$1
+tgt_lang=$2
+
+for split in train dev; do
+    src=${data_dir}/en-${tgt_lang}/data/${split}
+    dst=data/local/en-${tgt_lang}/${split}
+
+    [ ! -d ${src} ] && log "$0: no such directory ${src}" && exit 1;
+
+    wav_dir=${src}/wav
+    txt_dir=${src}/txt
+    yaml=${txt_dir}/${split}.yaml
+    en=${txt_dir}/${split}.en
+    tgt=${txt_dir}/${split}.${tgt_lang}
+
+    mkdir -p ${dst} || exit 1;
+
+    [ ! -d ${wav_dir} ] && log "$0: no such directory ${wav_dir}" && exit 1;
+    [ ! -d ${txt_dir} ] && log "$0: no such directory ${txt_dir}" && exit 1;
+    [ ! -f ${yaml} ] && log "$0: expected file ${yaml} to exist" && exit 1;
+    [ ! -f ${en} ] && log "$0: expected file ${en} to exist" && exit 1;
+    [ ! -f ${tgt} ] && log "$0: expected file ${tgt} to exist" && exit 1;
+
+    wav_scp=${dst}/wav.scp; [[ -f "${wav_scp}" ]] && rm ${wav_scp}
+    trans_en=${dst}/text.en; [[ -f "${trans_en}" ]] && rm ${trans_en}
+    trans_tgt=${dst}/text.${tgt_lang}; [[ -f "${trans_tgt}" ]] && rm ${trans_tgt}
+    utt2spk=${dst}/utt2spk; [[ -f "${utt2spk}" ]] && rm ${utt2spk}
+    spk2utt=${dst}/spk2utt; [[ -f "${spk2utt}" ]] && rm ${spk2utt}
+    segments=${dst}/segments; [[ -f "${segments}" ]] && rm ${segments}
+
+    # error check
+    n=$(cat ${yaml} | grep duration | wc -l)
+    n_en=$(cat ${en} | wc -l)
+    n_tgt=$(cat ${tgt} | wc -l)
+    [ ${n} -ne ${n_en} ] && log "Error: expected ${n} data entries, found ${n_en}" && exit 1;
+    [ ${n} -ne ${n_tgt} ] && log "Error: expected ${n} data entries, found ${n_tgt}" && exit 1;
+
+	# copy files to ${dst}, removing empty lines
+	cp ${yaml} ${dst}/.yaml0
+	cp ${en} ${dst}/en.org
+	cp ${tgt} ${dst}/${tgt_lang}.org
+
+	empty_lines=$(grep -n '^$' "${yaml}" "${en}" "${tgt}" | cut -d ':' -f 2 | sort -nu | tr '\n' ',')
+	sed_commands=$(echo "${empty_lines}" | sed 's/,/d;/g')
+
+	if [ -z "$sed_commands" ]; then
+		log "No empty lines found in ${src}"
+	else
+		sed -i -e "${sed_commands}" "${dst}/.yaml0" "${dst}/en.org" "${dst}/${tgt_lang}.org"
+		log "Found empty lines at line ${empty_lines} in ${src}, removing them for further processing. The original files are kept in ${src}"
+	fi
+
+    # transcriptions and translations text file preparation
+    grep duration ${dst}/.yaml0 > ${dst}/.yaml1
+
+    # make utt_id from yaml
+    # e.g., - {duration: 3.079999, offset: 7.28, speaker_id: spk.4, wav: bn4.wav} -> ted_00004_0007280_0010360
+    # NOTE: Extend the lengths of short utterances (< 0.2s) rather than exclude them
+    awk '{
+        duration=$3; offset=$5; spkid=$7;
+        gsub(",","",duration);
+        gsub(",","",offset);
+        gsub(",","",spkid);
+        gsub("spk.","",spkid);
+        duration=sprintf("%.7f", duration);
+        if ( duration < 0.2 ) extendt=sprintf("%.7f", (0.2-duration)/2);
+        else extendt=0;
+        offset=sprintf("%.7f", offset);
+        startt=offset-extendt;
+        endt=offset+duration+extendt;
+        printf("ted_%05d_%07.0f_%07.0f\n", spkid, int(1000*startt+0.5), int(1000*endt+0.5));
+    }' ${dst}/.yaml1 > ${dst}/.yaml2
+
+    # text normalization
+    for lang in en ${tgt_lang}; do
+        # normalize punctuation
+        normalize-punctuation.perl -l ${lang} < ${dst}/${lang}.org > ${dst}/${lang}.norm
+
+        # lowercasing
+        lowercase.perl < ${dst}/${lang}.norm > ${dst}/${lang}.norm.lc
+        cp ${dst}/${lang}.norm ${dst}/${lang}.norm.tc
+
+        # remove punctuation
+        ../../../utils/remove_punctuation.pl < ${dst}/${lang}.norm.lc > ${dst}/${lang}.norm.lc.rm
+
+        # tokenization
+        tokenizer.perl -l ${lang} -q < ${dst}/${lang}.norm.tc > ${dst}/${lang}.norm.tc.tok
+        tokenizer.perl -l ${lang} -q < ${dst}/${lang}.norm.lc > ${dst}/${lang}.norm.lc.tok
+        tokenizer.perl -l ${lang} -q < ${dst}/${lang}.norm.lc.rm > ${dst}/${lang}.norm.lc.rm.tok
+
+        paste -d " " ${dst}/.yaml2 ${dst}/${lang}.norm.tc.tok | sort > ${dst}/text.tc.${lang}
+        paste -d " " ${dst}/.yaml2 ${dst}/${lang}.norm.lc.tok | sort > ${dst}/text.lc.${lang}
+        paste -d " " ${dst}/.yaml2 ${dst}/${lang}.norm.lc.rm.tok | sort > ${dst}/text.lc.rm.${lang}
+
+        # save original and cleaned punctuation
+        lowercase.perl < ${dst}/${lang}.org | ../../../utils/text2token.py -s 0 -n 1 | tr " " "\n" \
+            | sort | uniq | grep -v -e '^\s*$' | awk '{print $0 " " NR+1}' > ${dst}/punctuation.${lang}
+        lowercase.perl < ${dst}/${lang}.norm.tc | ../../../utils/text2token.py -s 0 -n 1 | tr " " "\n" \
+            | sort | uniq | grep -v -e '^\s*$' | awk '{print $0 " " NR+1}' > ${dst}/punctuation.clean.${lang}
+    done
+
+    # error check
+    n=$(cat ${dst}/.yaml2 | wc -l)
+    n_en=$(cat ${dst}/en.norm.tc.tok | wc -l)
+    n_tgt=$(cat ${dst}/${tgt_lang}.norm.tc.tok | wc -l)
+    [ ${n} -ne ${n_en} ] && log "Error: expected ${n} data entries, found ${n_en}" && exit 1;
+    [ ${n} -ne ${n_tgt} ] && log "Error: expected ${n} data entries, found ${n_tgt}" && exit 1;
+
+    # segments file preparation
+    awk '{
+        segment=$1; split(segment,S,"[_]");
+        spkid=S[1] "_" S[2]; startf=S[3]; endf=S[4];
+        printf("%s %s %.2f %.2f\n", segment, spkid, startf/1000, endf/1000);
+    }' < ${dst}/text.tc.${tgt_lang} | uniq | sort > ${dst}/segments
+
+    # wav.scp file preparation
+    awk '{
+        segment=$1; split(segment,S,"[_]");
+        spkid=S[1] "_" S[2];
+        printf("%s cat '${wav_dir}'/'${tgt_lang}'%d.wav |\n", spkid, S[2]);
+    }' < ${dst}/text.tc.${tgt_lang} | uniq | sort > ${dst}/wav.scp
+
+    # utt2spk file preparation
+    awk '{
+        segment=$1; split(segment,S,"[_]");
+        spkid=S[1] "_" S[2]; print $1 " " spkid
+    }' ${dst}/segments | uniq | sort > ${dst}/utt2spk
+
+    # spk2utt file preparation
+    cat ${dst}/utt2spk | utils/utt2spk_to_spk2utt.pl | sort > ${dst}/spk2utt
+
+    # error check
+    n_en=$(cat ${dst}/text.tc.en | wc -l)
+    n_tgt=$(cat ${dst}/text.tc.${tgt_lang} | wc -l)
+    [ ${n_en} -ne ${n_tgt} ] && log "Error: expected ${n_en} data entries, found ${n_tgt}" && exit 1;
+
+    # copy files into its final locations
+    mkdir -p data/${split}.en-${tgt_lang}
+
+    # remove duplicated utterances (i.e. utterances with the same offset)
+    log "removing duplicate lines..."
+    cut -d ' ' -f 1 ${dst}/text.tc.en | sort | uniq -c | sort -n -k1 -r | grep -v '1 ted' \
+        | sed 's/^[ \t]*//' > ${dst}/duplicate_lines
+    cut -d ' ' -f 1 ${dst}/text.tc.en | sort | uniq -c | sort -n -k1 -r | grep '1 ted' \
+        | cut -d '1' -f 2- | sed 's/^[ \t]*//' > ${dst}/reclist
+    ../../../utils/reduce_data_dir.sh ${dst} ${dst}/reclist data/${split}.en-${tgt_lang}
+    for l in en ${tgt_lang}; do
+        for case in tc lc lc.rm; do
+            cp ${dst}/text.${case}.${l} data/${split}.en-${tgt_lang}/text.${case}.${l}
+        done
+    done
+    utils/fix_data_dir.sh --utt_extra_files \
+        "text.tc.en text.lc.en text.lc.rm.en text.tc.${tgt_lang} text.lc.${tgt_lang} text.lc.rm.${tgt_lang}" \
+        data/${split}.en-${tgt_lang}
+
+    # error check
+    n_seg=$(cat data/${split}.en-${tgt_lang}/segments | wc -l)
+    n_text=$(cat data/${split}.en-${tgt_lang}/text.tc.${tgt_lang} | wc -l)
+    [ ${n_seg} -ne ${n_text} ] && log "Error: expected ${n_seg} data entries, found ${n_text}" && exit 1;
+done
+
+for split in tst-COMMON; do
+    src=${data_dir}/en-${tgt_lang}/data/${split}
+    dst=data/local/en-${tgt_lang}/${split}
+
+    [ ! -d ${src} ] && log "$0: no such directory ${src}" && exit 1;
+
+    wav_dir=${src}/wav
+    txt_dir=${src}/txt
+    yaml=${txt_dir}/${split}.yaml
+
+    mkdir -p ${dst} || exit 1;
+
+    [ ! -d ${wav_dir} ] && log "$0: no such directory ${wav_dir}" && exit 1;
+    [ ! -d ${txt_dir} ] && log "$0: no such directory ${txt_dir}" && exit 1;
+    [ ! -f ${yaml} ] && log "$0: expected file ${yaml} to exist" && exit 1;
+
+    wav_scp=${dst}/wav.scp; [[ -f "${wav_scp}" ]] && rm ${wav_scp}
+    utt2spk=${dst}/utt2spk; [[ -f "${utt2spk}" ]] && rm ${utt2spk}
+    spk2utt=${dst}/spk2utt; [[ -f "${spk2utt}" ]] && rm ${spk2utt}
+    segments=${dst}/segments; [[ -f "${segments}" ]] && rm ${segments}
+
+    cp ${yaml} ${dst}/.yaml0
+
+    # make utt_id from yaml
+    # e.g., - {duration: 3.079999, offset: 7.28, speaker_id: spk.4, wav: bn4.wav} -> ted_00004_0007280_0010360
+    awk '{
+        duration=$3; offset=$5; spkid=$7;
+        gsub(",","",duration);
+        gsub(",","",offset);
+        gsub(",","",spkid);
+        gsub("spk.","",spkid);
+        duration=sprintf("%.7f", duration);
+        offset=sprintf("%.7f", offset);
+        startt=offset;
+        endt=offset+duration;
+        printf("ted_%05d_%07.0f_%07.0f\n", spkid, int(1000*startt+0.5), int(1000*endt+0.5));
+    }' ${dst}/.yaml0 > ${dst}/.yaml2
+
+    # segments file preparation
+    awk '{
+        segment=$1; split(segment,S,"[_]");
+        spkid=S[1] "_" S[2]; startf=S[3]; endf=S[4];
+        printf("%s %s %.2f %.2f\n", segment, spkid, startf/1000, endf/1000);
+    }' < ${dst}/.yaml2 | uniq | sort > ${dst}/segments
+
+    # wav.scp file preparation
+    awk '{
+        segment=$1; split(segment,S,"[_]");
+        spkid=S[1] "_" S[2];
+        printf("%s cat '${wav_dir}'/'${tgt_lang}'%d.wav |\n", spkid, S[2]);
+    }' < ${dst}/.yaml2 | uniq | sort > ${dst}/wav.scp
+
+    # utt2spk file preparation
+    awk '{
+        segment=$1; split(segment,S,"[_]");
+        spkid=S[1] "_" S[2]; print $1 " " spkid
+    }' ${dst}/segments | uniq | sort > ${dst}/utt2spk
+
+    # spk2utt file preparation
+    cat ${dst}/utt2spk | utils/utt2spk_to_spk2utt.pl | sort > ${dst}/spk2utt
+
+    # copy files into its final locations
+    final_dst=data/${split}.en-${tgt_lang}
+    mkdir -p ${final_dst}
+    cp ${dst}/segments ${final_dst}/segments
+    cp ${dst}/wav.scp ${final_dst}/wav.scp
+    cp ${dst}/utt2spk ${final_dst}/utt2spk
+    cp ${dst}/spk2utt ${final_dst}/spk2utt
+
+    # check if the files in the data directory are in correct format
+    utils/fix_data_dir.sh ${final_dst}
+
+    # error check
+    n=$(cat ${dst}/.yaml2 | wc -l)
+    n_seg=$(cat ${final_dst}/segments | wc -l)
+    [ ${n} -ne ${n_seg} ] && log "Error: expected ${n} data entries, found ${n_seg}" && exit 1;
+done
+
+log "$0: successfully prepared data in ${dst}"
diff --git a/egs2/iwslt24_indic/st1/local/download_and_unpack.sh b/egs2/iwslt24_indic/st1/local/download_and_unpack.sh
new file mode 100755
index 00000000000..71c985f888f
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/local/download_and_unpack.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+set -e
+set -u
+set -o pipefail
+
+log() {
+    local fname=${BASH_SOURCE[1]##*/}
+    echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+log "$0 $*"
+. utils/parse_options.sh
+
+if [ $# -ne 3 ]; then
+    echo "Usage: $0 <dataset dir> <tgt_lang> <remove_archive>"
+    echo "e.g.: $0 /path/to/indic/data hi true"
+fi
+
+data_dir=$1
+tgt_lang=$2
+remove_archive=$3
+
+# check tgt_lang
+if [ "$tgt_lang" == "hi" ]; then
+    target_language="Hindi"
+elif [ "$tgt_lang" == "bn" ]; then
+    target_language="Bengali"
+elif [ "$tgt_lang" == "ta" ]; then
+    target_language="Tamil"
+else
+    log "Error: ${tgt_lang} is not supported. It must be one of hi, bn, or ta."
+    exit 1;
+fi
+
+# check if the dataset is already unpacked
+if [ -f ${data_dir}/.unpacked_en-${tgt_lang} ]; then
+    log "$0: Data has already been extracted successfully. Skipping this stage."
+    exit 0
+fi
+
+# check if zip files are present
+if [ ! -f "${data_dir}/${target_language}.zip" ]; then
+    log "Please contact IWSLT 2024 Indic track organizers (See project website: https://iwslt.org/2024/indic) to download the training and development data, and place the zip files inside ${data_dir}."
+fi
+if [ ! -f "${data_dir}/${target_language}-Test.zip" ]; then
+    log "Please contact IWSLT 2024 Indic track organizers (See project website: https://iwslt.org/2024/indic) to download the test data, and place the zip files inside ${data_dir}."
+fi
+
+# unzip files
+if [ ! -d "${data_dir}/${target_language}" ]; then
+    UNZIP_DISABLE_ZIPBOMB_DETECTION=TRUE unzip "${data_dir}/${target_language}.zip" -d ${data_dir}
+fi
+if [ ! -d "${data_dir}/${target_language}-Test" ]; then
+    unzip "${data_dir}/${target_language}-Test.zip" -d ${data_dir}
+fi
+
+# reorganize directories
+mv ${data_dir}/${target_language}/en-${tgt_lang} ${data_dir}/
+mv ${data_dir}/${target_language}-Test/tst-COMMON ${data_dir}/en-${tgt_lang}/data/
+rmdir ${data_dir}/${target_language}
+rmdir ${data_dir}/${target_language}-Test
+
+# remove zip files if necessary
+if ${remove_archive}; then
+    log "Removing zip files..."
+    rm ${data_dir}/${target_language}.zip
+    rm ${data_dir}/${target_language}-Test.zip
+fi
+
+touch ${data_dir}/.unpacked_en-${tgt_lang}
+log "$0: Successfully downloaded and unpacked en-${tgt_lang}"
diff --git a/egs2/iwslt24_indic/st1/local/path.sh b/egs2/iwslt24_indic/st1/local/path.sh
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/egs2/iwslt24_indic/st1/path.sh b/egs2/iwslt24_indic/st1/path.sh
new file mode 120000
index 00000000000..8e43dca7d4d
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/path.sh
@@ -0,0 +1 @@
+../../TEMPLATE/st1/path.sh
\ No newline at end of file
diff --git a/egs2/iwslt24_indic/st1/pyscripts b/egs2/iwslt24_indic/st1/pyscripts
new file mode 120000
index 00000000000..ac68ad75b60
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/pyscripts
@@ -0,0 +1 @@
+../../TEMPLATE/asr1/pyscripts
\ No newline at end of file
diff --git a/egs2/iwslt24_indic/st1/run.sh b/egs2/iwslt24_indic/st1/run.sh
new file mode 100644
index 00000000000..584811187dd
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/run.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+src_lang=en
+tgt_lang=hi  # one of hi (Hindi), bn (Bengali), or ta (Tamil)
+
+train_set=train.en-${tgt_lang}
+train_dev=dev.en-${tgt_lang}
+test_set=tst-COMMON.en-${tgt_lang}
+
+st_config=conf/tuning/train_st_conformer.yaml
+inference_config=conf/tuning/decode_st_conformer.yaml
+
+./st.sh \
+    --src_lang ${src_lang} \
+    --tgt_lang ${tgt_lang} \
+    --local_data_opts "${tgt_lang}" \
+    --train_set "${train_set}" \
+    --valid_set "${train_dev}" \
+    --test_sets "${test_set}" \
+    --feats_type raw \
+    --audio_format "flac.ark" \
+    --src_token_type "bpe" \
+    --src_nbpe 4000 \
+    --tgt_token_type "bpe" \
+    --tgt_nbpe 4000 \
+    --feats_normalize "utterance_mvn" \
+    --st_config "${st_config}" \
+    --inference_config "${inference_config}" \
+    --gpu_inference true \
+    "$@"
diff --git a/egs2/iwslt24_indic/st1/scripts b/egs2/iwslt24_indic/st1/scripts
new file mode 120000
index 00000000000..b25829705dc
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/scripts
@@ -0,0 +1 @@
+../../TEMPLATE/asr1/scripts
\ No newline at end of file
diff --git a/egs2/iwslt24_indic/st1/st.sh b/egs2/iwslt24_indic/st1/st.sh
new file mode 120000
index 00000000000..5c7465739e3
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/st.sh
@@ -0,0 +1 @@
+../../TEMPLATE/st1/st.sh
\ No newline at end of file
diff --git a/egs2/iwslt24_indic/st1/steps b/egs2/iwslt24_indic/st1/steps
new file mode 120000
index 00000000000..69ab7056139
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/steps
@@ -0,0 +1 @@
+../../TEMPLATE/asr1/steps
\ No newline at end of file
diff --git a/egs2/iwslt24_indic/st1/utils b/egs2/iwslt24_indic/st1/utils
new file mode 120000
index 00000000000..e18ae14b549
--- /dev/null
+++ b/egs2/iwslt24_indic/st1/utils
@@ -0,0 +1 @@
+../../TEMPLATE/asr1/utils
\ No newline at end of file