Add Weekly job for model analysis

tenstorrent · Dec 3, 2024 · febad94 · febad94
1 parent d156919
commit febad94
Show file tree

Hide file tree

Showing 63 changed files with 393 additions and 97 deletions.
diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml
@@ -0,0 +1,51 @@
+name: Build Docker Image
+
+on:
+  workflow_dispatch:
+  workflow_call:
+    outputs:
+      docker-image:
+        description: "Built docker image name"
+        value: ${{ jobs.build-image.outputs.docker-image }}
+
+
+jobs:
+
+  build-image:
+    runs-on: builder
+    outputs:
+      docker-image: ${{ steps.build.outputs.docker-image }}
+    steps:
+      - name: Fix permissions
+        shell: bash
+        run: sudo chown ubuntu:ubuntu -R $(pwd)
+
+      - uses: actions/checkout@v4
+        with:
+            submodules: recursive
+            fetch-depth: 0 # Fetch all history and tags
+
+      # Clean everything from submodules (needed to avoid issues
+      # with cmake generated files leftover from previous builds)
+      - name: Cleanup submodules
+        run: |
+          git submodule foreach --recursive git clean -ffdx
+          git submodule foreach --recursive git reset --hard
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build Docker images and output the image name
+        id: build
+        shell: bash
+        run: |
+          # Output the image name
+          set pipefail
+          .github/build-docker-images.sh | tee docker.log
+          DOCKER_CI_IMAGE=$(tail -n 1 docker.log)
+          echo "DOCKER_CI_IMAGE $DOCKER_CI_IMAGE"
+          echo "docker-image=$DOCKER_CI_IMAGE" >> "$GITHUB_OUTPUT"
diff --git a/.github/workflows/model-analysis-weekly.yml b/.github/workflows/model-analysis-weekly.yml
@@ -0,0 +1,114 @@
+name: Model Analysis Weekly
+
+on:
+  workflow_dispatch:
+  # schedule:
+  #   - cron: '0 23 * * 5' # 11:00 PM UTC Friday (12:00 AM Saturday Serbia)
+  push:
+    branches: ["pchandrasekaran/model_analysis_weekly_job"]
+
+jobs:
+
+  docker-build:
+    uses: ./.github/workflows/build-image.yml
+    secrets: inherit
+
+  model-analysis:
+    needs: docker-build
+    runs-on: runner
+
+    container:
+      image: ${{ needs.docker-build.outputs.docker-image }}
+      options: --device /dev/tenstorrent/0
+      volumes:
+        - /dev/hugepages:/dev/hugepages
+        - /dev/hugepages-1G:/dev/hugepages-1G
+        - /etc/udev/rules.d:/etc/udev/rules.d
+        - /lib/modules:/lib/modules
+        - /opt/tt_metal_infra/provisioning/provisioning_env:/opt/tt_metal_infra/provisioning/provisioning_env
+
+    steps:
+
+      - name: Set reusable strings
+        id: strings
+        shell: bash
+        run: |
+          echo "work-dir=$(pwd)" >> "$GITHUB_OUTPUT"
+          echo "build-output-dir=$(pwd)/build" >> "$GITHUB_OUTPUT"
+
+      - name: Git safe dir
+        run: git config --global --add safe.directory ${{ steps.strings.outputs.work-dir }}
+
+      - uses: actions/checkout@v4
+        with:
+            submodules: recursive
+            fetch-depth: 0 # Fetch all history and tags
+
+      # Clean everything from submodules (needed to avoid issues
+      # with cmake generated files leftover from previous builds)
+      - name: Cleanup submodules
+        run: |
+            git submodule foreach --recursive git clean -ffdx
+            git submodule foreach --recursive git reset --hard
+
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          create-symlink: true
+          key: model-analysis-${{ runner.os }}
+
+      - name: Build
+        shell: bash
+        run: |
+          source env/activate
+          cmake -G Ninja \
+          -B ${{ steps.strings.outputs.build-output-dir }} \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_C_COMPILER=clang \
+          -DCMAKE_CXX_COMPILER=clang++ \
+          -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+          -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+          cmake --build ${{ steps.strings.outputs.build-output-dir }}
+
+      - name: Run Model Analysis Script
+        shell: bash
+        run: |
+          source env/activate
+          apt install -y libgl1-mesa-glx
+          python scripts/model_analysis.py \
+            --test_directory_or_file_path forge/test/models/pytorch \
+            --dump_failure_logs \
+            --markdown_directory_path ./model_analysis_docs \
+            --unique_ops_output_directory_path ./models_unique_ops_output \
+            2>&1 | tee model_analysis.log
+
+      - name: Upload Model Analysis Script Logs
+        uses: actions/upload-artifact@v4
+        if: success() || failure()
+        with:
+          name: model-analysis-outputs
+          path: model_analysis.log
+
+      - name: Upload Models Unique Ops test Failure Logs
+        uses: actions/upload-artifact@v4
+        if: success() || failure()
+        with:
+          name: unique-ops-logs
+          path: ./models_unique_ops_output
+
+      # - name: Create Pull Request
+      #   uses: peter-evans/create-pull-request@v7
+      #   with:
+      #     branch: model_analysis
+      #     committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
+      #     author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
+      #     base: main
+      #     commit-message: "Update model analysis docs"
+      #     title: "Update model analysis docs"
+      #     body: "This PR will update model analysis docs"
+      #     labels: model_analysis
+      #     delete-branch: true
+      #     token: ${{ secrets.GH_TOKEN }}
+      #     add-paths: |
+      #         model_analysis_docs/
+      #     draft: true # Need to remove
diff --git a/.github/workflows/on-pr.yml b/.github/workflows/on-pr.yml
@@ -12,8 +12,8 @@ jobs:
   pre-commit:
     uses: ./.github/workflows/pre-commit.yml
     secrets: inherit
-  docker-build:
-    uses: ./.github/workflows/build-and-test.yml
-    secrets: inherit
-    with:
-      test_mark: 'push'
+  # docker-build:
+  #   uses: ./.github/workflows/build-and-test.yml
+  #   secrets: inherit
+  #   with:
+  #     test_mark: 'push'
diff --git a/forge/test/conftest.py b/forge/test/conftest.py
@@ -448,3 +448,24 @@ def pytest_runtest_logreport(report):
         for key, default_value in environ_before_test.items():
             if os.environ.get(key, "") != default_value:
                 os.environ[key] = default_value
+
+
+def pytest_collection_modifyitems(config, items):
+
+    marker = config.getoption("-m")  # Get the marker from the -m option
+
+    if marker and marker == "model_analysis_new":  # If a marker is specified
+        filtered_items = [item for item in items if marker in item.keywords]
+        print("Automatic Model Analysis Collected tests: ")
+        test_count = 0
+        for item in items:
+            if marker in item.keywords:
+                test_file_path = item.location[0]
+                test_name = item.location[2]
+                print(f"{test_file_path}::{test_name}")
+                test_count += 1
+        print(f"Automatic Model Analysis Collected test count: {test_count}")
+        if not filtered_items:  # Warn if no tests match the marker
+            print(f"Warning: No tests found with marker '{marker}'.")
+    else:
+        print(items)
diff --git a/forge/test/models/pytorch/audio/whisper/test_whisper_0.py b/forge/test/models/pytorch/audio/whisper/test_whisper_0.py
@@ -99,6 +99,7 @@ def forward(self, decoder_input_ids, encoder_hidden_states):
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.parametrize("variant", variants, ids=variants)
 def test_whisper(test_device, variant):
 

diff --git a/forge/test/models/pytorch/multimodal/clip/test_clip.py b/forge/test/models/pytorch/multimodal/clip/test_clip.py
@@ -17,6 +17,7 @@
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_clip_pytorch(test_device):
 
     # Set Forge configuration parameters

diff --git a/forge/test/models/pytorch/multimodal/vilt/test_vilt.py b/forge/test/models/pytorch/multimodal/vilt/test_vilt.py
@@ -50,8 +50,9 @@ def generate_model_vilt_question_answering_hf_pytorch(test_device, variant):
 variants = ["dandelin/vilt-b32-finetuned-vqa"]
 
 
-@pytest.mark.parametrize("variant", variants, ids=variants)
 @pytest.mark.nightly
+@pytest.mark.model_analysis
+@pytest.mark.parametrize("variant", variants, ids=variants)
 def test_vilt_question_answering_hf_pytorch(variant, test_device):
     model, inputs, _ = generate_model_vilt_question_answering_hf_pytorch(
         test_device,
@@ -93,9 +94,10 @@ def generate_model_vilt_maskedlm_hf_pytorch(test_device, variant):
 variants = ["dandelin/vilt-b32-mlm"]
 
 
+@pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.xfail(reason="pcc=0.9498278562793674")
 @pytest.mark.parametrize("variant", variants, ids=variants)
-@pytest.mark.nightly
 def test_vilt_maskedlm_hf_pytorch(variant, test_device):
     model, inputs, _ = generate_model_vilt_maskedlm_hf_pytorch(
         test_device,

diff --git a/forge/test/models/pytorch/text/albert/test_albert.py b/forge/test/models/pytorch/text/albert/test_albert.py
@@ -12,10 +12,11 @@
 variants = ["v1", "v2"]
 
 
+@pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.parametrize("variant", variants, ids=variants)
 @pytest.mark.parametrize("size", sizes, ids=sizes)
-@pytest.mark.nightly
 def test_albert_masked_lm_pytorch(size, variant, test_device):
     model_ckpt = f"albert-{size}-{variant}"
 
@@ -55,10 +56,11 @@ def test_albert_masked_lm_pytorch(size, variant, test_device):
 variants = ["v1", "v2"]
 
 
+@pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.parametrize("variant", variants, ids=variants)
 @pytest.mark.parametrize("size", sizes, ids=sizes)
-@pytest.mark.nightly
 def test_albert_token_classification_pytorch(size, variant, test_device):
 
     compiler_cfg = forge.config._get_global_compiler_config()

diff --git a/forge/test/models/pytorch/text/bart/test_bart.py b/forge/test/models/pytorch/text/bart/test_bart.py
@@ -23,6 +23,7 @@ def forward(self, input_ids, attention_mask, decoder_input_ids):
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_pt_bart_classifier(test_device):
     compiler_cfg = _get_global_compiler_config()
     compiler_cfg.compile_depth = CompileDepth.SPLIT_GRAPH

diff --git a/forge/test/models/pytorch/text/bert/test_bert.py b/forge/test/models/pytorch/text/bert/test_bert.py
@@ -40,6 +40,7 @@ def generate_model_bert_maskedlm_hf_pytorch(variant):
 
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_bert_masked_lm_pytorch(test_device):
     model, inputs, _ = generate_model_bert_maskedlm_hf_pytorch("bert-base-uncased")
 
@@ -89,6 +90,7 @@ def generate_model_bert_qa_hf_pytorch(variant):
 
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_bert_question_answering_pytorch(test_device):
     model, inputs, _ = generate_model_bert_qa_hf_pytorch("bert-large-cased-whole-word-masking-finetuned-squad")
 
@@ -128,6 +130,7 @@ def generate_model_bert_seqcls_hf_pytorch(variant):
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_bert_sequence_classification_pytorch(test_device):
     model, inputs, _ = generate_model_bert_seqcls_hf_pytorch(
         "textattack/bert-base-uncased-SST-2",
@@ -169,6 +172,7 @@ def generate_model_bert_tkcls_hf_pytorch(variant):
 
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_bert_token_classification_pytorch(test_device):
     model, inputs, _ = generate_model_bert_tkcls_hf_pytorch("dbmdz/bert-large-cased-finetuned-conll03-english")
 

diff --git a/forge/test/models/pytorch/text/codegen/test_codegen.py b/forge/test/models/pytorch/text/codegen/test_codegen.py
@@ -21,6 +21,7 @@
 
 @pytest.mark.xfail(reason="RuntimeError: Tensor 41 - data type mismatch: expected Float32, got BFloat16")
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.parametrize("variant", variants, ids=variants)
 def test_codegen(test_device, variant):
     # Configurations

diff --git a/forge/test/models/pytorch/text/distilbert/test_distilbert.py b/forge/test/models/pytorch/text/distilbert/test_distilbert.py
@@ -16,6 +16,7 @@
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.parametrize("variant", variants, ids=variants)
 def test_distilbert_masked_lm_pytorch(variant, test_device):
     # Load DistilBert tokenizer and model from HuggingFace
@@ -46,6 +47,7 @@ def test_distilbert_masked_lm_pytorch(variant, test_device):
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_distilbert_question_answering_pytorch(test_device):
     # Load Bert tokenizer and model from HuggingFace
     model_ckpt = "distilbert-base-cased-distilled-squad"
@@ -82,6 +84,7 @@ def test_distilbert_question_answering_pytorch(test_device):
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_distilbert_sequence_classification_pytorch(test_device):
 
     # Load DistilBert tokenizer and model from HuggingFace
@@ -109,6 +112,7 @@ def test_distilbert_sequence_classification_pytorch(test_device):
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_distilbert_token_classification_pytorch(test_device):
     # Load DistilBERT tokenizer and model from HuggingFace
     model_ckpt = "Davlan/distilbert-base-multilingual-cased-ner-hrl"

diff --git a/forge/test/models/pytorch/text/dpr/test_dpr.py b/forge/test/models/pytorch/text/dpr/test_dpr.py
@@ -20,6 +20,7 @@
 
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.parametrize("variant", variants, ids=variants)
 def test_dpr_context_encoder_pytorch(variant, test_device):
 
@@ -61,6 +62,7 @@ def test_dpr_context_encoder_pytorch(variant, test_device):
 
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.parametrize("variant", variants, ids=variants)
 def test_dpr_question_encoder_pytorch(variant, test_device):
     # Load Bert tokenizer and model from HuggingFace
@@ -101,6 +103,7 @@ def test_dpr_question_encoder_pytorch(variant, test_device):
 
 @pytest.mark.xfail(reason="TT_FATAL(weights.get_dtype() == DataType::BFLOAT16) in embedding op")
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.parametrize("variant", variants, ids=variants)
 def test_dpr_reader_pytorch(variant, test_device):
     # Load Bert tokenizer and model from HuggingFace

diff --git a/forge/test/models/pytorch/text/falcon/test_falcon.py b/forge/test/models/pytorch/text/falcon/test_falcon.py
@@ -8,6 +8,7 @@
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_falcon(test_device):
 
     compiler_cfg = forge.config._get_global_compiler_config()

diff --git a/forge/test/models/pytorch/text/fuyu/test_fuyu_8b.py b/forge/test/models/pytorch/text/fuyu/test_fuyu_8b.py
@@ -30,6 +30,7 @@
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 def test_fuyu8b(test_device):
     # Set Forge configuration parameters
     compiler_cfg = forge.config._get_global_compiler_config()

diff --git a/forge/test/models/pytorch/text/gemma/test_gemma_2b.py b/forge/test/models/pytorch/text/gemma/test_gemma_2b.py
@@ -254,6 +254,7 @@ def forward(self, hidden_states, attn_mask, pos_ids):
 
 
 @pytest.mark.nightly
+@pytest.mark.model_analysis
 @pytest.mark.parametrize("variant", variants, ids=variants)
 def test_gemma_2b(test_device, variant):
     # Random see for reproducibility