diff --git a/.github/workflows/self-comment-slow-ci.yml b/.github/workflows/self-comment-slow-ci.yml
new file mode 100644
index 00000000000000..061f8e05e7633b
--- /dev/null
+++ b/.github/workflows/self-comment-slow-ci.yml
@@ -0,0 +1,282 @@
+name: PR comment github action tests
+
+on:
+  issue_comment:
+    types: 
+      - created
+    branches-ignore:
+      - main
+  pull_request_review_comment:
+    types: [created, deleted]
+  # pull_request:
+concurrency:
+  # But this will cancel the current job no ? Because they have the same PR number ... (any comment even not the target one)
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.event.issue.number }}-${{ contains(github.event.comment.body, 'run-slow') }}
+  cancel-in-progress: ${{ (github.event.issue.pull_request != null || github.event.pull_request != null) }}
+
+jobs:
+  get-pr-number:
+    runs-on: ubuntu-22.04
+    name: Get PR number
+    outputs:
+      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
+    steps:
+      - name: Get PR number
+        shell: bash
+        run: |
+          echo "${{ github.event.pull_request.number }}"
+          echo "${{ github.event.issue.number }}"
+          echo "${{ github.event.issue.pull_request }}"
+          if [ "${{ github.event.pull_request.number }}" != "" ]; then
+            echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+          elif [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
+          else
+            echo "PR_NUMBER=" >> $GITHUB_ENV
+          fi
+
+      - name: Check PR number
+        shell: bash
+        run: |
+          echo "${{ env.PR_NUMBER }}"
+
+      - name: Set PR number
+        id: set_pr_number
+        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
+
+  # Get commit sha of `refs/pull/PR_NUMBER/merge` and `refs/pull/PR_NUMBER/head`
+  get-sha:
+    runs-on: ubuntu-latest
+    needs: get-pr-number
+    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
+    outputs:
+      PR_MERGE_COMMIT_SHA: ${{ steps.get_sha.outputs.PR_MERGE_COMMIT_SHA }}
+      PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: "0"
+          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
+
+      - name: Get SHA
+        id: get_sha
+        env:
+          PR_NUMBER: ${{needs.get-pr-number.outputs.PR_NUMBER}}
+        run: |
+            echo "PR_MERGE_COMMIT_SHA: $(git log -1 --format=%H)"
+            echo "PR_MERGE_COMMIT_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
+            git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
+            git checkout refs/remotes/pull/$PR_NUMBER/head
+            echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
+            echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
+
+  # use a python script to handle this complex logic
+  # case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
+  # case 2: `run-slow model_1, model_2`
+  # case 3: comment is `pytest ...`
+  get-tests:
+    runs-on: ubuntu-latest
+    needs: get-pr-number
+    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
+    permissions: write-all
+    outputs:
+      models: ${{ steps.models_to_run.outputs.models }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: "0"
+          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
+
+      - name: Get models to test
+        run: |
+          python -m pip install GitPython
+          python utils/pr_slow_ci_models.py --message "${{ github.event.comment.body }}" | tee output.txt
+          echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
+
+      - name: Show models to test
+        id: models_to_run
+        run: |
+          echo "${{ env.models }}"
+          echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
+
+      # TODO: update
+      - name: Reply to the comment
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
+            -f "body=This comment contains run-slow, running the specified job..."
+
+  create_run:
+    name: Create run
+    if: ${{ needs.get-tests.outputs.models != '[]' }}
+    needs: [get-sha, get-tests]
+    permissions: write-all
+    runs-on: ubuntu-latest
+    steps:
+      - name: Create Run
+        id: create_run
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        run: |
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
+            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
+
+  # (To be removed: only for playing with this PR on `transformers_ci_bot` where no self-hosted runner is available)
+  run_models_gpu_dummy:
+      name: Run all tests for the model (dummy)
+      # Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
+      # (either a new model PR or via a commit message)
+      if: ${{ needs.get-tests.outputs.models != '[]' }}
+      needs: [get-pr-number, get-tests, create_run]
+      strategy:
+        fail-fast: false
+        matrix:
+          folders: ${{ fromJson(needs.get-tests.outputs.models) }}
+          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+      runs-on: ubuntu-latest
+      steps:
+      - name: Echo input and matrix info
+        shell: bash
+        run: |
+          echo "${{ matrix.folders }}"
+          sleep 10s
+
+  run_models_gpu:
+      name: Run all tests for the model
+      # Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
+      # (either a new model PR or via a commit message)
+      if: ${{ needs.get-tests.outputs.models != '[]' }}
+      needs: [get-pr-number, get-tests, create_run]
+      strategy:
+        fail-fast: false
+        matrix:
+          folders: ${{ fromJson(needs.get-tests.outputs.models) }}
+          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+      runs-on:
+         group: '${{ matrix.machine_type }}'
+      container:
+        image: huggingface/transformers-all-latest-gpu
+        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      steps:
+      - name: Echo input and matrix info
+        shell: bash
+        run: |
+          echo "${{ matrix.folders }}"
+
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: Checkout to PR merge commit
+        working-directory: /transformers
+        run: |
+            git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
+            git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
+            git log -1 --format=%H
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Set `machine_type` for report and artifact names
+        working-directory: /transformers
+        shell: bash
+        run: |
+          echo "${{ matrix.machine_type }}"
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+            machine_type=single-gpu
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+            machine_type=multi-gpu
+          else
+            machine_type=${{ matrix.machine_type }}
+          fi
+          echo "$machine_type"
+          echo "machine_type=$machine_type" >> $GITHUB_ENV
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: |
+          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
+          echo $CUDA_VISIBLE_DEVICES
+          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+
+      - name: Make sure report directory exists
+        shell: bash
+        run: |
+          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+
+      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+
+  update_run_status:
+    name: Update Check Run Status
+    needs: [get-sha, create_run, run_models_gpu]
+    permissions: write-all
+    if: ${{ always() && needs.create_run.result == 'success' }}
+    runs-on: ubuntu-latest
+    env:
+      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+    steps:
+      - name: Get `run_models_gpu` job status
+        run: |
+          echo "${{ needs.run_models_gpu.result }}"
+          if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then
+            echo "STATUS=failure" >> $GITHUB_ENV
+          elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then
+            echo "STATUS=success" >> $GITHUB_ENV
+          else
+            echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV
+          fi
+
+      - name: Update PR commit statuses
+        run: |
+          echo "${{ needs.run_models_gpu.result }}"
+          echo "${{ env.STATUS }}"
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
+            -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"
diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py
index 0696413ef76030..3630f6a05f1999 100644
--- a/src/transformers/modeling_gguf_pytorch_utils.py
+++ b/src/transformers/modeling_gguf_pytorch_utils.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import re
-from typing import Optional
+from typing import Any, Dict, Optional
 
 import numpy as np
 from tqdm import tqdm
@@ -242,7 +242,7 @@ def reverse_reshape_bias(weights: np.ndarray, n_head: int, n_embed: int):
 
 
 def split_moe_expert_tensor(
-    weights: np.ndarray, parsed_parameters: dict[str, dict], name: str, tensor_key_mapping: dict
+    weights: np.ndarray, parsed_parameters: Dict[str, Any], name: str, tensor_key_mapping: dict
 ):
     # Original merge implementation
     # https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py#L1994-L2022
@@ -262,4 +262,4 @@ def split_moe_expert_tensor(
     for i in range(0, w_counter):
         temp_name = name.replace(".weight", f".{i}.{exp_name}.weight")
         exp_weight = weights[i]
-        parsed_parameters["tensors"][temp_name] = torch.from_numpy(np.copy(exp_weight))
+        parsed_parameters["tensors"][temp_name] = torch.from_numpy(np.copy(exp_weight))
\ No newline at end of file
diff --git a/utils/pr_slow_ci_models.py b/utils/pr_slow_ci_models.py
index 391e99fc2276f8..f1bde9e179c66d 100644
--- a/utils/pr_slow_ci_models.py
+++ b/utils/pr_slow_ci_models.py
@@ -101,45 +101,44 @@ def get_new_model():
     return new_model
 
 
-def parse_commit_message(commit_message: str) -> str:
+def parse_message(message: str) -> str:
     """
     Parses the commit message to find the models specified in it to run slow CI.
 
     Args:
-        commit_message (`str`): The commit message of the current commit.
+        message (`str`): The commit message of the current commit.
 
     Returns:
-        `str`: The substring in `commit_message` after `[run-slow]`, [run_slow]` or [run slow]`. If no such prefix is
+        `str`: The substring in `message` after `[run-slow]`, [run_slow]` or [run slow]`. If no such prefix is
          found, the empty string is returned.
     """
-    if commit_message is None:
+    if message is None:
         return ""
 
-    command_search = re.search(r"\[([^\]]*)\](.*)", commit_message)
-    if command_search is None:
-        return ""
+    message = message.strip().lower()
 
-    command = command_search.groups()[0]
-    command = command.lower().replace("-", " ").replace("_", " ")
-    run_slow = command == "run slow"
-    if run_slow:
-        models = command_search.groups()[1].strip()
-        return models
-    else:
+    # run-slow: model_1, model_2
+    if not message.startswith(("run-slow", "run_slow", "run slow")):
         return ""
+    message = message[len("run slow"):]
+    # remove leading `:`
+    while message.strip().startswith(":"):
+        message = message.strip()[1:]
+
+    return message
 
 
-def get_models(commit_message: str):
-    models = parse_commit_message(commit_message)
+def get_models(message: str):
+    models = parse_message(message)
     return [f"models/{x}" for x in models.replace(",", " ").split()]
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--commit_message", type=str, default="", help="The commit message.")
+    parser.add_argument("--message", type=str, default="", help="The commit message.")
     args = parser.parse_args()
 
     new_model = get_new_model()
-    specified_models = get_models(args.commit_message)
+    specified_models = get_models(args.message)
     models = ([] if new_model == "" else [new_model]) + specified_models
     print(sorted(set(models)))