Merge branch 'main' into feat/llama

microsoft · Nov 27, 2024 · 54c3e85 · 54c3e85
2 parents b08f9e3 + 96f5cce
commit 54c3e85
Show file tree

Hide file tree

Showing 5 changed files with 36 additions and 6 deletions.
diff --git a/.azure-pipelines/ansible-integration-test.yml b/.azure-pipelines/ansible-integration-test.yml
@@ -7,6 +7,7 @@ trigger:
 
 pool:
   name: SuperBench CI
+  demands: ansible-agent
   vmImage: ubuntu-latest
 
 container:

diff --git a/.azure-pipelines/cuda-unit-test.yml b/.azure-pipelines/cuda-unit-test.yml
@@ -7,22 +7,26 @@ trigger:
 
 pool:
   name: SuperBench CI
+  demands: cuda-agent
   vmImage: ubuntu-latest
 
 container:
   image: nvcr.io/nvidia/pytorch:24.03-py3
-  options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
+  options: '--name cuda-ci -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker:ro'
 
 steps:
   - script: |
       echo "##vso[task.prependpath]$HOME/.local/bin"
     displayName: Export path
   - script: |
+      docker exec -t -u root -e DEBIAN_FRONTEND=noninteractive cuda-ci bash -c \
+        "apt-get update -y -q && \
+        yes '' | apt-get install -y -q sudo && \
+        apt-get install -y -q \
+        ffmpeg libavcodec-dev libavformat-dev libavutil-dev libboost-program-options-dev libswresample-dev"
       python3 -m pip install --upgrade pip setuptools==65.7
       python3 -m pip install .[test,nvworker]
       make postinstall
-      sudo DEBIAN_FRONTEND=noninteractive apt-get update
-      sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev
     displayName: Install dependencies
   - script: |
       python3 setup.py lint

diff --git a/.codecov.yml b/.codecov.yml
@@ -15,6 +15,8 @@ coverage:
         threshold: 1%
         flags:
           - cpu-python3.7-unit-test
+          - cpu-python3.8-unit-test
+          - cpu-python3.10-unit-test
           - cuda-unit-test
           - directx-unit-test
     patch:
@@ -23,5 +25,7 @@ coverage:
         threshold: 1%
         flags:
           - cpu-python3.7-unit-test
+          - cpu-python3.8-unit-test
+          - cpu-python3.10-unit-test
           - cuda-unit-test
           - directx-unit-test
diff --git a/superbench/benchmarks/base.py b/superbench/benchmarks/base.py
@@ -48,6 +48,8 @@ def __init__(self, name, parameters=''):
             allow_abbrev=False,
             formatter_class=SortedMetavarTypeHelpFormatter,
         )
+        # Fix optionals title in Python 3.10
+        self._parser._optionals.title = 'optional arguments'
         self._args = None
         self._curr_run_index = 0
         self._result = None

diff --git a/tests/benchmarks/model_benchmarks/test_pytorch_base.py b/tests/benchmarks/model_benchmarks/test_pytorch_base.py
@@ -250,16 +250,35 @@ def test_pytorch_empty_cache():
     # Register mnist benchmark.
     BenchmarkRegistry.register_benchmark('pytorch-mnist', PytorchMNIST)
 
+    # Get initial memory reserved
+    init_res_memory = torch.cuda.memory_reserved()
+
     # Test cache empty by manually calling torch.cuda.empty_cache().
     parameters = '--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train'
     benchmark = PytorchMNIST('pytorch-mnist', parameters=parameters)
+
     assert (benchmark)
     assert (benchmark._preprocess())
     assert (benchmark._benchmark())
     del benchmark
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] > 0)
+
+    # Get current reserved memory after benchmark
+    post_bm_res_memory = torch.cuda.memory_reserved()
+
+    # Assert that memory is increased after benchmark
+    assert (post_bm_res_memory >= init_res_memory)
+
+    # Manually empty cache and get reserved memory
+    # Calling empty_cache() releases all unused cached memory from PyTorch so that those can be used by
+    # other GPU applications. However, the occupied GPU memory by tensors will not be freed so it can not
+    # increase the amount of GPU memory available for PyTorch.
+    # https://pytorch.org/docs/stable/notes/cuda.html#cuda-memory-management
     torch.cuda.empty_cache()
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
+    post_empty_cache_res_memory = torch.cuda.memory_reserved()
+
+    # Assert that some memory is released after manually empty cache. The cache is not guaranteed to be reset
+    # back to the init_res_memory due to some tensors not being released.
+    assert (post_empty_cache_res_memory <= post_bm_res_memory)
 
     # Test automatic cache empty.
     context = BenchmarkRegistry.create_benchmark_context(
@@ -268,4 +287,4 @@ def test_pytorch_empty_cache():
 
     benchmark = BenchmarkRegistry.launch_benchmark(context)
     assert (benchmark)
-    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
+    assert (torch.cuda.memory_reserved() == post_empty_cache_res_memory)