torchbench CI (#7162)

pytorch · Jun 10, 2024 · 291764f · 291764f
1 parent 28c5e14
commit 291764f
Show file tree

Hide file tree

Showing 3 changed files with 140 additions and 29 deletions.
diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh
@@ -1,34 +1,32 @@
 set -ex
 
 function run_torch_xla_python_tests() {
-  PYTORCH_DIR=$1
-  XLA_DIR=$2
-  USE_COVERAGE="${3:-0}"
+  XLA_DIR=$1
+  USE_COVERAGE="${2:-0}"
 
   pushd $XLA_DIR
-    echo "Running Python Tests"
-    if [ "$USE_COVERAGE" != "0" ]; then
-      pip install coverage==6.5.0 --upgrade
-      pip install coverage-lcov
-      pip install toml
-      ./test/run_tests.sh
-      coverage combine
-      mkdir lcov && cp .coverage lcov/
-      coverage-lcov --data_file_path lcov/.coverage
-      coverage html
-      cp lcov.info htmlcov/
-      mv htmlcov ~/
-      chmod -R 755 ~/htmlcov
-    else
-      ./test/run_tests.sh
-    fi
+  echo "Running Python Tests"
+  if [ "$USE_COVERAGE" != "0" ]; then
+    pip install coverage==6.5.0 --upgrade
+    pip install coverage-lcov
+    pip install toml
+    ./test/run_tests.sh
+    coverage combine
+    mkdir lcov && cp .coverage lcov/
+    coverage-lcov --data_file_path lcov/.coverage
+    coverage html
+    cp lcov.info htmlcov/
+    mv htmlcov ~/
+    chmod -R 755 ~/htmlcov
+  else
+    ./test/run_tests.sh
+  fi
   popd
 }
 
 function run_torch_xla_cpp_tests() {
-  PYTORCH_DIR=$1
-  XLA_DIR=$2
-  USE_COVERAGE="${3:-0}"
+  XLA_DIR=$1
+  USE_COVERAGE="${2:-0}"
 
   TORCH_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch').get_filename()))")
   export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TORCH_DIR}/lib
@@ -73,9 +71,15 @@ function run_torch_xla_cpp_tests() {
 
 function run_torch_xla_benchmark_tests() {
   XLA_DIR=$1
+  TORCHBENCH_MODELS=(BERT_pytorch dcgan)
   pushd $XLA_DIR
-    echo "Running Benchmark Tests"
-    test/benchmarks/run_tests.sh -L""
+  echo "Running Benchmark Tests"
+  test/benchmarks/run_tests.sh -L""
+  popd
+  pushd $XLA_DIR
+  echo "Running Torchbench Tests"
+  test/benchmarks/run_torchbench_tests.sh "${TORCHBENCH_MODELS[@]}"
+  popd
 }
 
 PYTORCH_DIR=$1
@@ -91,16 +95,16 @@ export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla"
 export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
 
 if [[ -z "$RUN_BENCHMARK_TESTS" && -z "$RUN_CPP_TESTS1" && -z "$RUN_CPP_TESTS2" && -z "$RUN_PYTHON_TESTS" ]]; then
-  run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
-  run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
+  run_torch_xla_python_tests $XLA_DIR $USE_COVERAGE
+  run_torch_xla_cpp_tests $XLA_DIR $USE_COVERAGE
   run_torch_xla_benchmark_tests $XLA_DIR
 else
   # run tests separately.
   if [[ "$RUN_PYTHON_TESTS" == "python_tests" ]]; then
-    run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
+    run_torch_xla_python_tests $XLA_DIR $USE_COVERAGE
   elif [[ "$RUN_BENCHMARK_TESTS" == "benchmark_tests" ]]; then
     run_torch_xla_benchmark_tests $XLA_DIR
   else
-    run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
+    run_torch_xla_cpp_tests $XLA_DIR $USE_COVERAGE
   fi
 fi
diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py
@@ -124,7 +124,7 @@ def _is_available(self, experiment_config):
     return True
 
   def load_experiment(self, experiment_config):
-    accelerator = experiment_config["accelerator"]
+    accelerator = experiment_config["accelerator"].lower()
     xla = experiment_config["xla"]
     xla_flags = experiment_config["xla_flags"]
     dynamo = experiment_config["dynamo"]

diff --git a/test/benchmarks/run_torchbench_tests.sh b/test/benchmarks/run_torchbench_tests.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+set -ex
+CDIR="$(cd "$(dirname "$0")" ; pwd -P)"
+echo $CDIR
+
+TORCHBENCH_MODELS=("$@")
+# construct the absolute path
+XLA_DIR=$CDIR/../../
+PYTORCH_DIR=$XLA_DIR/../
+TORCHVISION_DIR=$PYTORCH_DIR/vision
+TORCHAUDIO_DIR=$PYTORCH_DIR/audio
+TORCHTEXT_DIR=$PYTORCH_DIR/text
+TORCHBENCH_DIR=$PYTORCH_DIR/benchmark
+
+# Note [Keep Going]
+#
+# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CI tests continue on error.
+# This will allow you to see all the failures on your PR, not stopping with the first
+# test failure like the default behavior.
+CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-0}"
+if [[ "$CONTINUE_ON_ERROR" == "1" ]]; then
+  set +e
+fi
+
+
+function install_package() {
+  pushd $CDIR
+
+  torchvision_commit_hash=$(cat $PYTORCH_DIR/.github/ci_commit_pins/vision.txt)
+  echo torchvision_commit_hash: "$torchvision_commit_hash"
+  git clone --quiet https://github.com/pytorch/vision.git "$TORCHVISION_DIR"
+  cd $TORCHVISION_DIR
+  git checkout $torchvision_commit_hash
+  python setup.py install 1>/dev/null
+
+  torchaudio_commit_hash=$(cat $PYTORCH_DIR/.github/ci_commit_pins/audio.txt)
+  echo torchaudio_commit_hash: "$torchaudio_commit_hash"
+  git clone --quiet https://github.com/pytorch/audio.git "$TORCHAUDIO_DIR"
+  cd $TORCHAUDIO_DIR
+  git checkout $torchaudio_commit_hash
+  python setup.py install 1>/dev/null
+
+  torchtext_commit_hash=$(cat $PYTORCH_DIR/.github/ci_commit_pins/text.txt)
+  echo torchtext_commit_hash: "$torchtext_commit_hash"
+  git clone --quiet https://github.com/pytorch/text.git "$TORCHTEXT_DIR"
+  cd $TORCHTEXT_DIR
+  git checkout $torchtext_commit_hash
+  git submodule update --init --recursive
+  python setup.py clean install 1>/dev/null
+
+  popd
+}
+
+function install_torchbench_models() {
+  pushd $CDIR
+
+  git clone --quiet https://github.com/pytorch/benchmark.git "$TORCHBENCH_DIR"
+  cd $TORCHBENCH_DIR
+  for model in "${TORCHBENCH_MODELS[@]}"; do
+      echo "Installing model: $model"
+      python install.py models "$model"
+      if [ $? -ne 0 ]; then
+        echo "ERROR: Failed to install $model. Exiting." >&2
+        exit 1
+      fi
+  done
+  popd
+}
+
+success_count=0
+function run_tests {
+  local overall_status=0
+  local pjrt_device="CPU"
+  # TODO(piz): Uncomment the following if we decide to run on GPU.
+  # if [ -x "$(command -v nvidia-smi)" ]; then
+  #   num_devices=$(nvidia-smi --list-gpus | wc -l)
+  #   echo "Found $num_devices GPU devices..."
+  #   export GPU_NUM_DEVICES=$num_devices
+  #   pjrt_device="CUDA"
+  # fi
+  for model in "${TORCHBENCH_MODELS[@]}"; do
+    echo "testing model: $model"
+    PJRT_DEVICE=$pjrt_device python -u benchmarks/experiment_runner.py \
+    --suite-name=torchbench \
+    --experiment-config='{"accelerator":"'"$pjrt_device"'","xla":"PJRT","dynamo":"openxla","test":"eval","torch_xla2":null,"xla_flags":null,"keep_model_data_on_cuda":false}' \
+    --model-config='{"model_name":"'"$model"'"}'
+    if [ $? -ne 0 ]; then
+      echo "ERROR: Failed to test $model. Exiting with failure." >&2
+      overall_status=1
+    else
+      success_count=$((success_count + 1))
+    fi
+  done
+  return $overall_status
+}
+
+install_package
+install_torchbench_models
+run_tests
+if [ $? -ne 0 ]; then
+  echo "Torchbench test suite failed."
+  exit 1
+else
+  echo "All torchbench tests passed successfully."
+fi
+total_models=${#TORCHBENCH_MODELS[@]}
+echo "Successful tests: $success_count out of $total_models models"