Skip to content

Commit

Permalink
torchbench CI (#7162)
Browse files Browse the repository at this point in the history
  • Loading branch information
zpcore committed Jun 10, 2024
1 parent 28c5e14 commit 291764f
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 29 deletions.
60 changes: 32 additions & 28 deletions .github/scripts/run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
set -ex

function run_torch_xla_python_tests() {
PYTORCH_DIR=$1
XLA_DIR=$2
USE_COVERAGE="${3:-0}"
XLA_DIR=$1
USE_COVERAGE="${2:-0}"

pushd $XLA_DIR
echo "Running Python Tests"
if [ "$USE_COVERAGE" != "0" ]; then
pip install coverage==6.5.0 --upgrade
pip install coverage-lcov
pip install toml
./test/run_tests.sh
coverage combine
mkdir lcov && cp .coverage lcov/
coverage-lcov --data_file_path lcov/.coverage
coverage html
cp lcov.info htmlcov/
mv htmlcov ~/
chmod -R 755 ~/htmlcov
else
./test/run_tests.sh
fi
echo "Running Python Tests"
if [ "$USE_COVERAGE" != "0" ]; then
pip install coverage==6.5.0 --upgrade
pip install coverage-lcov
pip install toml
./test/run_tests.sh
coverage combine
mkdir lcov && cp .coverage lcov/
coverage-lcov --data_file_path lcov/.coverage
coverage html
cp lcov.info htmlcov/
mv htmlcov ~/
chmod -R 755 ~/htmlcov
else
./test/run_tests.sh
fi
popd
}

function run_torch_xla_cpp_tests() {
PYTORCH_DIR=$1
XLA_DIR=$2
USE_COVERAGE="${3:-0}"
XLA_DIR=$1
USE_COVERAGE="${2:-0}"

TORCH_DIR=$(python -c "import pkgutil; import os; print(os.path.dirname(pkgutil.get_loader('torch').get_filename()))")
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TORCH_DIR}/lib
Expand Down Expand Up @@ -73,9 +71,15 @@ function run_torch_xla_cpp_tests() {

function run_torch_xla_benchmark_tests() {
XLA_DIR=$1
TORCHBENCH_MODELS=(BERT_pytorch dcgan)
pushd $XLA_DIR
echo "Running Benchmark Tests"
test/benchmarks/run_tests.sh -L""
echo "Running Benchmark Tests"
test/benchmarks/run_tests.sh -L""
popd
pushd $XLA_DIR
echo "Running Torchbench Tests"
test/benchmarks/run_torchbench_tests.sh "${TORCHBENCH_MODELS[@]}"
popd
}

PYTORCH_DIR=$1
Expand All @@ -91,16 +95,16 @@ export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla"
export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")

if [[ -z "$RUN_BENCHMARK_TESTS" && -z "$RUN_CPP_TESTS1" && -z "$RUN_CPP_TESTS2" && -z "$RUN_PYTHON_TESTS" ]]; then
run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
run_torch_xla_python_tests $XLA_DIR $USE_COVERAGE
run_torch_xla_cpp_tests $XLA_DIR $USE_COVERAGE
run_torch_xla_benchmark_tests $XLA_DIR
else
# run tests separately.
if [[ "$RUN_PYTHON_TESTS" == "python_tests" ]]; then
run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
run_torch_xla_python_tests $XLA_DIR $USE_COVERAGE
elif [[ "$RUN_BENCHMARK_TESTS" == "benchmark_tests" ]]; then
run_torch_xla_benchmark_tests $XLA_DIR
else
run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
run_torch_xla_cpp_tests $XLA_DIR $USE_COVERAGE
fi
fi
2 changes: 1 addition & 1 deletion benchmarks/benchmark_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _is_available(self, experiment_config):
return True

def load_experiment(self, experiment_config):
accelerator = experiment_config["accelerator"]
accelerator = experiment_config["accelerator"].lower()
xla = experiment_config["xla"]
xla_flags = experiment_config["xla_flags"]
dynamo = experiment_config["dynamo"]
Expand Down
107 changes: 107 additions & 0 deletions test/benchmarks/run_torchbench_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/bash
set -ex
CDIR="$(cd "$(dirname "$0")" ; pwd -P)"
echo $CDIR

TORCHBENCH_MODELS=("$@")
# construct the absolute path
XLA_DIR=$CDIR/../../
PYTORCH_DIR=$XLA_DIR/../
TORCHVISION_DIR=$PYTORCH_DIR/vision
TORCHAUDIO_DIR=$PYTORCH_DIR/audio
TORCHTEXT_DIR=$PYTORCH_DIR/text
TORCHBENCH_DIR=$PYTORCH_DIR/benchmark

# Note [Keep Going]
#
# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CI tests continue on error.
# This will allow you to see all the failures on your PR, not stopping with the first
# test failure like the default behavior.
CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-0}"
if [[ "$CONTINUE_ON_ERROR" == "1" ]]; then
set +e
fi


function install_package() {
pushd $CDIR

torchvision_commit_hash=$(cat $PYTORCH_DIR/.github/ci_commit_pins/vision.txt)
echo torchvision_commit_hash: "$torchvision_commit_hash"
git clone --quiet https://github.com/pytorch/vision.git "$TORCHVISION_DIR"
cd $TORCHVISION_DIR
git checkout $torchvision_commit_hash
python setup.py install 1>/dev/null

torchaudio_commit_hash=$(cat $PYTORCH_DIR/.github/ci_commit_pins/audio.txt)
echo torchaudio_commit_hash: "$torchaudio_commit_hash"
git clone --quiet https://github.com/pytorch/audio.git "$TORCHAUDIO_DIR"
cd $TORCHAUDIO_DIR
git checkout $torchaudio_commit_hash
python setup.py install 1>/dev/null

torchtext_commit_hash=$(cat $PYTORCH_DIR/.github/ci_commit_pins/text.txt)
echo torchtext_commit_hash: "$torchtext_commit_hash"
git clone --quiet https://github.com/pytorch/text.git "$TORCHTEXT_DIR"
cd $TORCHTEXT_DIR
git checkout $torchtext_commit_hash
git submodule update --init --recursive
python setup.py clean install 1>/dev/null

popd
}

function install_torchbench_models() {
pushd $CDIR

git clone --quiet https://github.com/pytorch/benchmark.git "$TORCHBENCH_DIR"
cd $TORCHBENCH_DIR
for model in "${TORCHBENCH_MODELS[@]}"; do
echo "Installing model: $model"
python install.py models "$model"
if [ $? -ne 0 ]; then
echo "ERROR: Failed to install $model. Exiting." >&2
exit 1
fi
done
popd
}

success_count=0
function run_tests {
local overall_status=0
local pjrt_device="CPU"
# TODO(piz): Uncomment the following if we decide to run on GPU.
# if [ -x "$(command -v nvidia-smi)" ]; then
# num_devices=$(nvidia-smi --list-gpus | wc -l)
# echo "Found $num_devices GPU devices..."
# export GPU_NUM_DEVICES=$num_devices
# pjrt_device="CUDA"
# fi
for model in "${TORCHBENCH_MODELS[@]}"; do
echo "testing model: $model"
PJRT_DEVICE=$pjrt_device python -u benchmarks/experiment_runner.py \
--suite-name=torchbench \
--experiment-config='{"accelerator":"'"$pjrt_device"'","xla":"PJRT","dynamo":"openxla","test":"eval","torch_xla2":null,"xla_flags":null,"keep_model_data_on_cuda":false}' \
--model-config='{"model_name":"'"$model"'"}'
if [ $? -ne 0 ]; then
echo "ERROR: Failed to test $model. Exiting with failure." >&2
overall_status=1
else
success_count=$((success_count + 1))
fi
done
return $overall_status
}

install_package
install_torchbench_models
run_tests
if [ $? -ne 0 ]; then
echo "Torchbench test suite failed."
exit 1
else
echo "All torchbench tests passed successfully."
fi
total_models=${#TORCHBENCH_MODELS[@]}
echo "Successful tests: $success_count out of $total_models models"

0 comments on commit 291764f

Please sign in to comment.