Migrate from FX tracer to _export_to_torch_ir (#873)

## Changes - Use `torch._export._export_to_torch_ir` as new tracer (to replace FX symbolic tracing) - Remove pippy.fx and use torch.fx - Remove `PipelineDriver` (rpc backend) Old APIs such as `PipelineDriver` are no longer supported. And signature of the `from_tracing` API is changed to be in line with that of export. ## Major API usage ``` pipe = Pipe.from_tracing( mod, args.chunks, example_args=(x, y), ) stage = PipelineStage( pipe, args.rank, device=args.device, ) # Run if args.rank == 0: stage(x) elif args.rank == args.world_size - 1: out = stage(y) else: stage() ```
pytorch · Nov 29, 2023 · 619d535 · 619d535
1 parent 83a2308
commit 619d535
Show file tree

Hide file tree

Showing 127 changed files with 628 additions and 39,116 deletions.
diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml
@@ -23,10 +23,8 @@ jobs:
         pip install --upgrade pip
         pip install -r docs/requirements.txt
         pip install types-docutils types-setuptools tqdm types-tabulate
-        if [ -f requirements.txt ]; then pip install -r requirements.txt --index-url https://download.pytorch.org/whl/cpu; fi
-        pip install torchvision --index-url https://download.pytorch.org/whl/cpu
-        pip install git+https://github.com/pbelevich/transformers.git@compatible_with_pt_master
-        pip install "black<23" pylint==v3.0.0a5 mypy==v0.960 flake8==3.8.2 pyre-check==0.9.15 ufmt==2.1.0
+        if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
+        pip install "black<23" pylint==v3.0.0a5 mypy==v0.981 flake8==3.8.2 pyre-check==0.9.15 ufmt==2.1.0
     - name: Static Analysis Checks
       if: always()
-      run: ./check.sh --keep-going
+      run: ./check.sh
diff --git a/.github/workflows/pippy_gpu_tests.sh b/.github/workflows/pippy_gpu_tests.sh
diff --git a/.github/workflows/pippy_tests.yaml b/.github/workflows/pippy_tests.yaml
@@ -21,28 +21,26 @@ concurrency:
 
 jobs:
 
-  pytest_tests:
-    runs-on: linux.4xlarge
-    strategy:
-      matrix:
-        python-version: ["3.8", "3.9"]
-    container:
-      image: python:${{ matrix.python-version }}
+  # pytest_tests:
+  #   runs-on: linux.4xlarge
+  #   strategy:
+  #     matrix:
+  #       python-version: ["3.8", "3.9"]
+  #   container:
+  #     image: python:${{ matrix.python-version }}
 
-    steps:
-      - uses: actions/checkout@v2
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install flake8 pytest pytest-cov pytest-xdist numpy
-          if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
-      - name: Install pavel's huggingface fork
-        run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses
-      - name: Install pippy
-        run: "python setup.py install"
-      - name: Test with pytest
-        run: |
-          pytest --cov=pippy --ignore=test/hf_test.py --ignore=test/test_fx.py --ignore=test/test_fx_experimental.py --ignore=test/fx test/
+  #   steps:
+  #     - uses: actions/checkout@v2
+  #     - name: Install dependencies
+  #       run: |
+  #         python -m pip install --upgrade pip
+  #         pip install flake8 pytest pytest-cov pytest-xdist numpy
+  #         if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
+  #     - name: Install pippy
+  #       run: "python setup.py install"
+  #     - name: Test with pytest
+  #       run: |
+  #         pytest --cov=pippy test/
 
   # hf_model_tests:
   #   runs-on: linux.12xlarge
@@ -76,10 +74,8 @@ jobs:
     runs-on: linux.4xlarge
     strategy:
       matrix:
-        python-version: ["3.8", "3.9"]
-        replicate: ["0", "1"]
-        schedule: ["FillDrain", "1F1B"]
-        checkpoint: [ "0", "1" ]
+        python-version: ["3.9"]
+        schedule: ["FillDrain"]
     env:
       OMP_NUM_THREADS: "1"
     container:
@@ -92,30 +88,26 @@ jobs:
           python -m pip install --upgrade pip
           pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu
           if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
-      - name: Install pavel's huggingface fork
-        run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses
       - name: Install pippy
         run: "python setup.py install"
+      - name: Test forward pipe generation
+        run: python test/test_pipe.py
+      - name: Test backward pipe generation
+        run: python test/test_pipe_bwd.py
       - name: Run forward-only integration test
-        run: python test/local_test_forward.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
-      - name: Run forward-only-auto-parallel integration test
-        run: python test/local_test_forward_auto_parallel.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
+        run: torchrun --nproc-per-node 4 test/test_fwd.py
       - name: Run forward-loss-backward integration test
-        run: python test/local_test_forward_backward.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
-      - name: Run null_coalesce_accumulate integration test
-        run: python test/local_test_null_coalesce_accumulate.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }}
-      - name: Run PP + DDP test
-        run: python test/local_test_ddp.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
+        run: torchrun --nproc-per-node 4 test/test_bwd.py --schedule ${{ matrix.schedule }}
+      # - name: Run null_coalesce_accumulate integration test
+      #   run: python test/local_test_null_coalesce_accumulate.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }}
+      # - name: Run PP + DDP test
+      #   run: python test/local_test_ddp.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
       #- name: Run HF BERT forward-only integration test
-      #  run: python test/local_test_forward_hf_bert.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
-      - name: Run HF GPT2 forward-only integration test
-        run: python test/local_test_forward_hf_gpt2.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
-      - name: Run visualizer test
-        run: python test/local_test_visualizer.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }}
-      - name: Run auto-split test
-        run: python test/local_test_autosplit.py --replicate ${{ matrix.replicate }} -s ${{ matrix.schedule }}
-      - name: Run compile test
-        run: python test/local_test_compile.py -s ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
+      #  run: python test/local_test_forward_hf_bert.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
+      # - name: Run HF GPT2 forward-only integration test
+      #   run: python test/local_test_forward_hf_gpt2.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
+      # - name: Run auto-split test
+      #   run: python test/local_test_autosplit.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }}
 
   # hf_examples_set1:
   #   runs-on: linux.12xlarge
@@ -145,11 +137,11 @@ jobs:
   #         git submodule update --init test/minGPT
   #         python test/min_gpt_tracing.py
   #     - name: Run GPT2 example
-  #       run: python examples/hf/gpt2/pippy_gpt2.py -s ${{ matrix.schedule }}
+  #       run: python examples/hf/gpt2/pippy_gpt2.py --schedule ${{ matrix.schedule }}
   #     - name: Run BERT example
-  #       run: python examples/hf/bert/pippy_bert.py -s ${{ matrix.schedule }}
+  #       run: python examples/hf/bert/pippy_bert.py --schedule ${{ matrix.schedule }}
   #     - name: Run T5 example
-  #       run: python examples/hf/t5/pippy_t5.py -s ${{ matrix.schedule }}
+  #       run: python examples/hf/t5/pippy_t5.py --schedule ${{ matrix.schedule }}
   #     - name: "HF Translation: fine-tune T5 model translation English to Romanian"
   #       run: >
   #         python examples/hf/translation/run_translation.py --model_name_or_path t5-small --do_train --source_lang en --target_lang ro --source_prefix "translate English to Romanian: " --dataset_name wmt16 --dataset_config_name ro-en --output_dir /tmp/tst-translation --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --overwrite_output_dir --predict_with_generate --max_steps=10  --dp_group_size=1 --pp_group_size=8
@@ -186,84 +178,6 @@ jobs:
   #     - name: "HF Text classification: fine-tune BERT on the GLUE benchmark"
   #       run: python examples/hf/text-classification/run_glue.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path bert-base-cased --task_name mrpc --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --num_train_epochs 3 --output_dir /tmp/mrpc/ --max_steps=3 --overwrite_output_dir
 
-  integration_test_gpu:
-    runs-on: linux.16xlarge.nvidia.gpu
-    strategy:
-      matrix:
-        python-version: ["3.8"]
-        replicate: ["0", "1"]
-        schedule: ["FillDrain", "1F1B"]
-    env:
-      DOCKER_IMAGE: qts8n/cuda-python:devel
-      PIPPY_ROOT: /PiPPy
-      OMP_NUM_THREADS: "1"
-      REPLICATE: ${{ matrix.replicate }}
-      SCHEDULE: ${{ matrix.schedule }}
-
-    steps:
-      - name: Clean working directory
-        shell: bash
-        run: |
-          sudo rm -rf /home/ec2-user/actions-runner/_work/PiPPy/PiPPy/* || true
-      - uses: actions/checkout@v2
-      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-      - name: Pull Docker image
-        run: |
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Test docker run
-        run: |
-          set -x
-          # shellcheck disable=SC2086,SC2090
-          container_name=$(docker run \
-            --gpus all \
-            --shm-size=1g --ulimit memlock=-1 \
-            -e OMP_NUM_THREADS \
-            -e REPLICATE \
-            -e SCHEDULE \
-            --tty \
-            --detach \
-            -v "$(pwd):${PIPPY_ROOT}" \
-            -w "${PIPPY_ROOT}" \
-            "${DOCKER_IMAGE}"
-          )
-          # Run GPU tests and return error signal from docker
-          docker exec -t -w "${PIPPY_ROOT}" "${container_name}" bash -c "bash .github/workflows/pippy_gpu_tests.sh; exit \$?"
-      - name: Chown workspace
-        if: always()
-        run: |
-          # Ensure the working directory gets chowned back to the current user
-          docker run --rm -v "$(pwd):${PIPPY_ROOT}" -w "${PIPPY_ROOT}" "${DOCKER_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      - name: Kill containers, clean up images
-        if: always()
-        run: |
-          # ignore expansion of "docker ps -q" since it could be empty
-          # shellcheck disable=SC2046
-          docker stop $(docker ps -q) || true
-          # Prune all of the docker images
-          docker system prune -af
-
-  programming_model_tests:
-    runs-on: linux.4xlarge
-    strategy:
-      matrix:
-        python-version: ["3.9"]
-    container:
-      image: python:${{ matrix.python-version }}
-
-    steps:
-      - uses: actions/checkout@v2
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install numpy datasets evaluate scikit-learn sacrebleu
-          if [ -f requirements.txt ]; then pip install --pre -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
-      - name: Install pippy
-        run: "python setup.py install"
-      - name: Test PiPPy + Dynamo example
-        run: python examples/TorchDynamo/pippy_dynamo.py
-      - name: Run PiPPy in GSPMD style
-        run: python examples/gspmd/pippy_gspmd.py
+  # TODO:
+  # Update GPU test to use template in:
+  # https://github.com/pytorch/test-infra/wiki/Writing-generic-CI-jobs
diff --git a/check.sh b/check.sh
@@ -4,7 +4,7 @@ function usage() {
   echo 2>&1 <<EOF
 USAGE: ./check [--keep-going] [--pyre]
 
-  --keep-going (default: 0)
+  --keep-going (default: 1)
   Continue processing even when errors are ecountered.
 
   --pyre
@@ -17,7 +17,7 @@ EOF
 
 SKIP_FORMAT=0
 SKIP_PYRE=1
-KEEP_GOING=0
+KEEP_GOING=1
 for x in "$@"; do
   case "$x" in
     '--keep-going')
@@ -64,12 +64,11 @@ flake8 pippy
 (( RETVAL |= $? ))
 
 echo; echo "Running mypy ..."
-mypy pippy
+mypy --follow-imports=skip pippy
 (( RETVAL |= $? ))
 
 echo; echo "Running pylint ..."
 pylint --disable=all --enable=unused-import $(git ls-files '*.py')
 (( RETVAL |= $? ))
 
 exit $RETVAL
-
diff --git a/examples/hf/gpt2/pippy_gpt2.py b/examples/hf/gpt2/pippy_gpt2.py
@@ -66,8 +66,10 @@ def run_gspmd(pp_ranks, args):
     finish = time.time()
     print(f"GPT-2 model instantiation finished in {(finish - start) / 60:1.2f} minutes")
     gpt2.eval()
-    print(gpt2.config)
-    print(f"GPT-2 total number of params = {get_number_of_params(gpt2) // 10 ** 6}M")
+    if args.rank == 0:
+        print(gpt2.config)
+        print(f"GPT-2 total number of params = {get_number_of_params(gpt2) // 10 ** 6}M")
+        print(gpt2)
 
     emb_head = 2  # embeddings + head
     master_emb_head = 1 + emb_head  # master + embeddings + head
@@ -97,7 +99,8 @@ def run_gspmd(pp_ranks, args):
     sm_cnt = add_split_points(gpt2, decoders_per_rank)
     assert sm_cnt == len(all_worker_ranks), f"sm_cnt = {sm_cnt} all_worker_ranks = {all_worker_ranks}"
 
-    # print(gpt2)
+    if args.rank == 0:
+        print(gpt2)
 
     input_names = gpt2_input_dict.keys()
     sig = inspect.signature(gpt2.forward)
@@ -108,6 +111,9 @@ def run_gspmd(pp_ranks, args):
                               'past_key_values': [[False for _ in range(2)] for _ in range(12)]}
     gpt2_pipe = Pipe.from_tracing(gpt2, MULTI_USE_PARAM_CONFIG, tracer=PiPPyHFTracer(), concrete_args=concrete_args,
                                   output_loss_value_spec=output_loss_value_spec, deep_copy_module=False)
+    if args.rank == 0:
+        print(gpt2_pipe.split_gm)
+
     assert sm_cnt == len(list(gpt2_pipe.split_gm.children()))
 
     # Materialize model differently depending on run mode