From c7d9755d66d48c3c7743ccc1f4fa3eae02aecff4 Mon Sep 17 00:00:00 2001
From: Pei Zhang <piz@google.com>
Date: Wed, 7 Aug 2024 00:41:32 -0700
Subject: [PATCH] remove openxla_eval in test (#7811)

---
 benchmarks/README.md                          |  2 +-
 benchmarks/aggregate.py                       |  6 ++---
 benchmarks/benchmark_experiment.py            |  5 +---
 benchmarks/experiment_runner.py               |  2 +-
 benchmarks/llama.py                           |  2 +-
 benchmarks/matmul_bench.py                    |  3 +--
 benchmarks/nightly.sh                         |  4 ++--
 benchmarks/run_single_graph_bm.sh             |  2 +-
 benchmarks/run_top_tier_bm.sh                 |  2 +-
 test/benchmarks/a6000.inference.speedup.test  |  4 ++--
 test/benchmarks/a6000.jsonl                   |  2 --
 test/benchmarks/run_torchbench_tests.sh       |  3 +++
 test/benchmarks/test_experiment_runner.py     |  9 +++----
 .../v100.inference.histogram.tab.test         | 16 ++++++-------
 test/benchmarks/v100.inference.histogram.test |  6 ++---
 .../benchmarks/v100.inference.latest.tab.test | 24 +++++++++----------
 test/benchmarks/v100.inference.latest.test    |  8 +++----
 .../v100.inference.latest.tier1.test          |  4 ++--
 .../v100.inference.latest_grouped.test        |  9 ++++---
 ...100.inference.speedup.baseline_latest.test |  6 ++---
 .../v100.inference.speedup.tab.test           | 22 ++++++++---------
 test/benchmarks/v100.inference.speedup.test   |  6 ++---
 test/benchmarks/v100.jsonl                    |  4 ----
 test/dynamo/test_bridge.py                    |  2 +-
 24 files changed, 71 insertions(+), 82 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 71d6b63d139..6bf500f4705 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -104,7 +104,7 @@ works only for inference now.
 cd pytorch
 PJRT_DEVICE=CUDA python3 new_xla/benchmarks/experiment_runner.py \
     --xla=PJRT \
-    --dynamo=openxla_eval \
+    --dynamo=openxla \
     --test=eval \
     --filter=BERT_pytorch$ \
     --suite-name=torchbench \
diff --git a/benchmarks/aggregate.py b/benchmarks/aggregate.py
index 3da82f4db78..100fe8d7280 100644
--- a/benchmarks/aggregate.py
+++ b/benchmarks/aggregate.py
@@ -30,7 +30,6 @@
 _title_map = {
     'eager': 'Eager',
     'inductor': 'Inductor',
-    'openxla_eval+dynamo': 'XLA_Eval+Dynamo',
     'openxla+dynamo': 'XLA+Dynamo',
     'openxla+lazytensor': 'XLA+LazyTensor',
 }
@@ -99,7 +98,7 @@ def get_backend_name(dynamo: str, xla: str) -> str:
   if dynamo == 'inductor':
     return 'inductor'
   if xla == 'PJRT':
-    assert dynamo == 'openxla' or dynamo == 'openxla_eval' or dynamo == None
+    assert dynamo == 'openxla' or dynamo == None
     xla_name = dynamo
     tracer = 'dynamo'
     if not dynamo:
@@ -132,6 +131,7 @@ def process_file(args, results_map: Dict[str, Any], filename: str):
           sys.exit(f'JSONL record does not contain key {k}. JSONL: {r}')
         for kk in fields[k]:
           if kk not in r[k]:
+            print("lllllll: ", k)
             sys.exit(f'JSONL record does not contain key {k}.{kk}. JSONL: {r}')
 
       # Read in what we need.
@@ -707,7 +707,7 @@ def parse_args(args=None):
   args.exclude = args.exclude or [r"^$"]
   if not args.backends:
     if args.test == 'inference':
-      args.backends = ['inductor', 'openxla+dynamo', 'openxla_eval+dynamo']
+      args.backends = ['inductor', 'openxla+dynamo']
     else:
       args.backends = ['inductor', 'openxla+dynamo']
   for backend in args.backends:
diff --git a/benchmarks/benchmark_experiment.py b/benchmarks/benchmark_experiment.py
index 7799a765564..fff3573424b 100644
--- a/benchmarks/benchmark_experiment.py
+++ b/benchmarks/benchmark_experiment.py
@@ -23,7 +23,7 @@ def list_experiment_configs(self):
         "accelerator": ["cpu", "cuda", "tpu"],
         "xla": [None, "PJRT", "XRT"],
         "xla_flags": [None],
-        "dynamo": [None, "inductor", "openxla_eval", "openxla"],
+        "dynamo": [None, "inductor", "openxla"],
         "torch_xla2": [None],  # options only apply to torch_xla2
         "test": ["eval", "train"],
         "keep_model_data_on_cuda": [False],
@@ -96,9 +96,6 @@ def _is_available(self,
     if cfg_dynamo == "inductor":
       if cfg_accelerator == "tpu" or cfg_xla is not None:
         return False
-    elif cfg_dynamo == "openxla_eval":
-      if cfg_xla is None or cfg_test != "eval":
-        return False
     elif cfg_dynamo == "openxla":
       if cfg_xla is None:
         return False
diff --git a/benchmarks/experiment_runner.py b/benchmarks/experiment_runner.py
index d52c8e90860..81bd650f9ac 100644
--- a/benchmarks/experiment_runner.py
+++ b/benchmarks/experiment_runner.py
@@ -779,7 +779,7 @@ def __str__(self):
   )
   parser.add_argument(
       "--dynamo",
-      choices=["None", "inductor", "openxla_eval", "openxla"],
+      choices=["None", "inductor", "openxla"],
       action="append",
       help="Specify an xla option to use.",
   )
diff --git a/benchmarks/llama.py b/benchmarks/llama.py
index 41a884e6a02..53e88ddb0d2 100644
--- a/benchmarks/llama.py
+++ b/benchmarks/llama.py
@@ -136,7 +136,7 @@ def run_benchmarks(args, llama_dir: str, results_dir: str,
       os.remove(params_json)
     os.symlink(f'{size}.json', params_json)
     model_name = f"llama2.{size}"
-    for dynamo in [None, 'inductor', 'openxla', 'openxla_eval']:
+    for dynamo in [None, 'inductor', 'openxla']:
       backend = dynamo if dynamo else 'lazytensor'
       xla = None if dynamo == 'inductor' else 'PJRT'
       summary = f"{model_name} eval {backend} batch {args.batch_size}"
diff --git a/benchmarks/matmul_bench.py b/benchmarks/matmul_bench.py
index bd021acd850..af518f355ca 100644
--- a/benchmarks/matmul_bench.py
+++ b/benchmarks/matmul_bench.py
@@ -53,8 +53,7 @@ def main():
   for dtype in dtypes:
     for inductor_matmul, xla_matmul in zip(
         get_matmuls(device='cuda', dtype=dtype, backend='inductor'),
-        get_matmuls(
-            device=xm.xla_device(), dtype=dtype, backend='openxla_eval')):
+        get_matmuls(device=xm.xla_device(), dtype=dtype, backend='openxla')):
       ind_lhs_shape, ind_rhs_shape, ind_fn = inductor_matmul
       xla_lhs_shape, xla_rhs_shape, xla_fn = xla_matmul
       assert ind_lhs_shape == xla_lhs_shape, f"Expect matmul shapes to match for benchmarking. Mismatch lhs: {ind_lhs_shape}, rhs: {xla_rhs_shape}"
diff --git a/benchmarks/nightly.sh b/benchmarks/nightly.sh
index a787d3c1d51..7817d02496e 100755
--- a/benchmarks/nightly.sh
+++ b/benchmarks/nightly.sh
@@ -153,7 +153,7 @@ cd pytorch
 python xla/benchmarks/experiment_runner.py \
        --test=eval --test=train \
        --xla=PJRT \
-       --dynamo=None --dynamo=openxla --dynamo=openxla_eval \
+       --dynamo=None --dynamo=openxla \
        --suite-name=torchbench --accelerator=cuda \
        --output-dirname=${WORKSPACE_RESULTS_DIR:?} \
        --repeat=${REPEAT:?} --print-subprocess \
@@ -187,7 +187,7 @@ if [[ ${ENABLE_PROFILING?} ]]; then
   COMMON_TITLE_PREFIX="[Profiling ON] "
 fi
 
-INFERENCE_BACKENDS_CMD='--backends inductor openxla+dynamo openxla_eval+dynamo openxla+lazytensor'
+INFERENCE_BACKENDS_CMD='--backends inductor openxla+dynamo openxla+lazytensor'
 TRAINING_BACKENDS_CMD='--backends inductor openxla+dynamo openxla+lazytensor'
 
 # Skip result files coming from one-off runs.
diff --git a/benchmarks/run_single_graph_bm.sh b/benchmarks/run_single_graph_bm.sh
index d32334182a7..98e10a06d05 100755
--- a/benchmarks/run_single_graph_bm.sh
+++ b/benchmarks/run_single_graph_bm.sh
@@ -8,7 +8,7 @@ OUT_PATH=xla/benchmarks/bm_results/single_graph/$DATE
 mkdir -p $OUT_PATH
 
 python new_xla/benchmarks/experiment_runner.py \
-    --dynamo=inductor --dynamo=openxla_eval --dynamo=openxla \
+    --dynamo=inductor --dynamo=openxla \
     --xla=None --xla=PJRT \
     --test=eval \
     --filter-by-single-graph \
diff --git a/benchmarks/run_top_tier_bm.sh b/benchmarks/run_top_tier_bm.sh
index ca67e361a6f..9b8e8eb8eb6 100755
--- a/benchmarks/run_top_tier_bm.sh
+++ b/benchmarks/run_top_tier_bm.sh
@@ -8,7 +8,7 @@ OUT_PATH=xla/benchmarks/bm_results/$DATE
 mkdir -p $OUT_PATH
 
 python xla/benchmarks/experiment_runner.py \
-    --dynamo=inductor --dynamo=openxla_eval --dynamo=openxla \
+    --dynamo=inductor --dynamo=openxla \
     --xla=None --xla=PJRT \
     --test=eval --test=train \
     --filter-by-tier=1 --filter-by-tier=2 --filter-by-tier=3 \
diff --git a/test/benchmarks/a6000.inference.speedup.test b/test/benchmarks/a6000.inference.speedup.test
index 2b056888315..56bcbcc2552 100644
--- a/test/benchmarks/a6000.inference.speedup.test
+++ b/test/benchmarks/a6000.inference.speedup.test
@@ -1,2 +1,2 @@
-# Datetime(UTC),Speedup(Inductor/Oldest Inductor),StdDev,Speedup(XLA+Dynamo/Oldest Inductor),StdDev,Speedup(XLA_Eval+Dynamo/Oldest Inductor),StdDev
-2023-11-11 04:43:56.070348,1.0,0.0,,,0.76855822,0.0
+# Datetime(UTC),Speedup(Inductor/Oldest Inductor),StdDev,Speedup(XLA+Dynamo/Oldest Inductor),StdDev
+2023-11-11 04:43:56.070348,1.0,0.0,,
diff --git a/test/benchmarks/a6000.jsonl b/test/benchmarks/a6000.jsonl
index 4e6fef904a2..77011ae8761 100644
--- a/test/benchmarks/a6000.jsonl
+++ b/test/benchmarks/a6000.jsonl
@@ -1,7 +1,5 @@
-{"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Quadro P1000, NVIDIA RTX A6000, ", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla_eval", "test": "eval", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.01630276208743453, 0.01630276208743453, 0.01630276208743453], "per_iter_time": [15.150130984999123, 0.01630276208743453, 0.01630276208743453, 0.01630276208743453]}, "outputs_file": null, "timestamp": 1699677836.070348}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Quadro P1000, NVIDIA RTX A6000, ", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "train", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.02733510290272534, 0.02733510290272534, 0.02733510290272534], "per_iter_time": [15.150130984999123, 0.02733510290272534, 0.02733510290272534, 0.02733510290272534]}, "outputs_file": null, "timestamp": 1699677836.070348}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Quadro P1000, NVIDIA RTX A6000, ", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "eval", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.023930025985464454, 0.023930025985464454, 0.023930025985464454], "per_iter_time": [15.150130984999123, 0.023930025985464454, 0.023930025985464454, 0.023930025985464454]}, "outputs_file": null, "timestamp": 1699677836.070348}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Quadro P1000, NVIDIA RTX A6000, ", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.07779271597974002, 0.07779271597974002, 0.07779271597974002], "per_iter_time": [15.150130984999123, 0.07779271597974002, 0.07779271597974002, 0.07779271597974002]}, "outputs_file": null, "timestamp": 1699677836.070348}
-{"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Quadro P1000, NVIDIA RTX A6000, ", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla_eval", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.07154428213834763, 0.07154428213834763, 0.07154428213834763], "per_iter_time": [15.150130984999123, 0.07154428213834763, 0.07154428213834763, 0.07154428213834763]}, "outputs_file": null, "timestamp": 1699677836.070348}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Quadro P1000, NVIDIA RTX A6000, ", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.028790320036932826, 0.028790320036932826, 0.028790320036932826], "per_iter_time": [15.150130984999123, 0.028790320036932826, 0.028790320036932826, 0.028790320036932826]}, "outputs_file": null, "timestamp": 1699677836.070348}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Quadro P1000, NVIDIA RTX A6000, ", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 4}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.31177610205486417, 0.31177610205486417, 0.31177610205486417], "per_iter_time": [15.150130984999123, 0.31177610205486417, 0.31177610205486417, 0.31177610205486417]}, "outputs_file": null, "timestamp": 1699677836.070348}
diff --git a/test/benchmarks/run_torchbench_tests.sh b/test/benchmarks/run_torchbench_tests.sh
index 5dac81722ee..17cd61b5b29 100755
--- a/test/benchmarks/run_torchbench_tests.sh
+++ b/test/benchmarks/run_torchbench_tests.sh
@@ -54,8 +54,11 @@ function install_package() {
 function install_torchbench_models() {
   pushd $CDIR
 
+  torchbench_commit_hash=$(cat $PYTORCH_DIR/.github/ci_commit_pins/torchbench.txt)
   git clone --quiet https://github.com/pytorch/benchmark.git "$TORCHBENCH_DIR"
   cd $TORCHBENCH_DIR
+  git checkout $torchbench_commit_hash
+
   for model in "${TORCHBENCH_MODELS[@]}"; do
       echo "Installing model: $model"
       python install.py models "$model"
diff --git a/test/benchmarks/test_experiment_runner.py b/test/benchmarks/test_experiment_runner.py
index 0f107afbbfd..89765b045c3 100644
--- a/test/benchmarks/test_experiment_runner.py
+++ b/test/benchmarks/test_experiment_runner.py
@@ -92,13 +92,12 @@ def test_dummy_dry_run_inductor_cuda(self):
       self.assertIn(expected, child.stderr)
 
   @absltest.skipUnless(xr.device_type() in {'CUDA'}, 'Needs CUDA accelerator')
-  def test_dummy_openxla_eval_train_cuda(self):
+  def test_dummy_openxla_train_cuda(self):
     child = subprocess.run([
         "python",
         EXPERIMENT_RUNNER_PY,
         "--dynamo=inductor",
         "--dynamo=openxla",
-        "--dynamo=openxla_eval",
         "--xla=PJRT",
         "--xla=None",
         "--test=eval",
@@ -111,9 +110,8 @@ def test_dummy_openxla_eval_train_cuda(self):
                            capture_output=True,
                            text=True)
     expected_in_stderr = [
-        "Number of selected experiment configs: 5",
+        "Number of selected experiment configs: 4",
         "Number of selected model configs: 1",
-        "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": \"openxla_eval\", \"torch_xla2\": null, \"test\": \"eval\", \"keep_model_data_on_cuda\": false}",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": \"openxla\", \"torch_xla2\": null, \"test\": \"train\", \"keep_model_data_on_cuda\": false}",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": \"openxla\", \"torch_xla2\": null, \"test\": \"eval\", \"keep_model_data_on_cuda\": false}",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": null, \"xla_flags\": null, \"dynamo\": \"inductor\", \"torch_xla2\": null, \"test\": \"eval\", \"keep_model_data_on_cuda\": false}",
@@ -137,11 +135,10 @@ def test_dummy_dynamo_none_cuda(self):
                            capture_output=True,
                            text=True)
     expected_in_stderr = [
-        "Number of selected experiment configs: 9",
+        "Number of selected experiment configs: 8",
         "Number of selected model configs: 1",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": null, \"torch_xla2\": null, \"test\": \"eval\", \"keep_model_data_on_cuda\": false}",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": null, \"torch_xla2\": null, \"test\": \"train\", \"keep_model_data_on_cuda\": false}",
-        "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": \"openxla_eval\", \"torch_xla2\": null, \"test\": \"eval\", \"keep_model_data_on_cuda\": false}",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": \"openxla\", \"torch_xla2\": null, \"test\": \"eval\", \"keep_model_data_on_cuda\": false}",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": \"PJRT\", \"xla_flags\": null, \"dynamo\": \"openxla\", \"torch_xla2\": null, \"test\": \"train\", \"keep_model_data_on_cuda\": false}",
         "--model-config={\"model_name\": \"dummy\"} --experiment-config={\"accelerator\": \"cuda\", \"xla\": null, \"xla_flags\": null, \"dynamo\": null, \"torch_xla2\": null, \"test\": \"eval\", \"keep_model_data_on_cuda\": false}",
diff --git a/test/benchmarks/v100.inference.histogram.tab.test b/test/benchmarks/v100.inference.histogram.tab.test
index bd498fb1074..bc72b87ae74 100644
--- a/test/benchmarks/v100.inference.histogram.tab.test
+++ b/test/benchmarks/v100.inference.histogram.tab.test
@@ -1,9 +1,9 @@
 # ARGS: --format=tab
-╒════════════════════════════╤════════════╤════════════╤════════════╤══════════════╤══════════════╤══════════════╤═══════════════════╤═══════════════════╤═══════════════════╕
-│ Datetime(UTC)              │   Inductor │   Inductor │   Inductor │   XLA+Dynamo │   XLA+Dynamo │   XLA+Dynamo │   XLA_Eval+Dynamo │   XLA_Eval+Dynamo │   XLA_Eval+Dynamo │
-│                            │        p95 │        p50 │         p5 │          p95 │          p50 │           p5 │               p95 │               p50 │                p5 │
-╞════════════════════════════╪════════════╪════════════╪════════════╪══════════════╪══════════════╪══════════════╪═══════════════════╪═══════════════════╪═══════════════════╡
-│ 2023-11-11 05:32:18.723407 │       1.00 │       1.00 │       1.00 │         0.98 │         0.86 │         0.74 │              0.94 │              0.79 │              0.65 │
-├────────────────────────────┼────────────┼────────────┼────────────┼──────────────┼──────────────┼──────────────┼───────────────────┼───────────────────┼───────────────────┤
-│ 2023-11-12 05:32:18        │       1.51 │       1.41 │       1.31 │         1.53 │         1.17 │         0.81 │              1.34 │              1.05 │              0.77 │
-╘════════════════════════════╧════════════╧════════════╧════════════╧══════════════╧══════════════╧══════════════╧═══════════════════╧═══════════════════╧═══════════════════╛
+╒════════════════════════════╤════════════╤════════════╤════════════╤══════════════╤══════════════╤══════════════╕
+│ Datetime(UTC)              │   Inductor │   Inductor │   Inductor │   XLA+Dynamo │   XLA+Dynamo │   XLA+Dynamo │
+│                            │        p95 │        p50 │         p5 │          p95 │          p50 │           p5 │
+╞════════════════════════════╪════════════╪════════════╪════════════╪══════════════╪══════════════╪══════════════╡
+│ 2023-11-11 05:32:18.723407 │       1.00 │       1.00 │       1.00 │         0.98 │         0.86 │         0.74 │
+├────────────────────────────┼────────────┼────────────┼────────────┼──────────────┼──────────────┼──────────────┤
+│ 2023-11-12 05:32:18        │       1.51 │       1.41 │       1.31 │         1.53 │         1.17 │         0.81 │
+╘════════════════════════════╧════════════╧════════════╧════════════╧══════════════╧══════════════╧══════════════╛
diff --git a/test/benchmarks/v100.inference.histogram.test b/test/benchmarks/v100.inference.histogram.test
index 90f59457bfe..105084fabc1 100644
--- a/test/benchmarks/v100.inference.histogram.test
+++ b/test/benchmarks/v100.inference.histogram.test
@@ -1,3 +1,3 @@
-# Datetime(UTC),Inductor p95,Inductor p50,Inductor p5,XLA+Dynamo p95,XLA+Dynamo p50,XLA+Dynamo p5,XLA_Eval+Dynamo p95,XLA_Eval+Dynamo p50,XLA_Eval+Dynamo p5
-2023-11-11 05:32:18.723407,1.0,1.0,1.0,0.97631327,0.85586259,0.7354119,0.94359157,0.79447,0.64534844
-2023-11-12 05:32:18,1.50833479,1.40761418,1.30689358,1.52901152,1.17088985,0.81276817,1.33687535,1.05136221,0.76584908
+# Datetime(UTC),Inductor p95,Inductor p50,Inductor p5,XLA+Dynamo p95,XLA+Dynamo p50,XLA+Dynamo p5
+2023-11-11 05:32:18.723407,1.0,1.0,1.0,0.97631327,0.85586259,0.7354119
+2023-11-12 05:32:18,1.50833479,1.40761418,1.30689358,1.52901152,1.17088985,0.81276817
diff --git a/test/benchmarks/v100.inference.latest.tab.test b/test/benchmarks/v100.inference.latest.tab.test
index 2e737417870..25a38303a65 100644
--- a/test/benchmarks/v100.inference.latest.tab.test
+++ b/test/benchmarks/v100.inference.latest.tab.test
@@ -1,12 +1,12 @@
-# ARGS: --backends inductor openxla+dynamo openxla_eval+dynamo openxla+lazytensor --format=tab
-╒════════════╤════════════╤══════════╤════════════════════╤══════════════╤══════════╤════════════════════╤═══════════════════╤══════════╤════════════════════╤══════════════════╤══════════╤════════════════════╕
-│   Workload │    Speedup │   StdDev │ ModelName          │      Speedup │   StdDev │ ModelName          │           Speedup │   StdDev │ ModelName          │          Speedup │   StdDev │ ModelName          │
-│            │   Inductor │          │ Inductor           │   XLA+Dynamo │          │ XLA+Dynamo         │   XLA_Eval+Dynamo │          │ XLA_Eval+Dynamo    │   XLA+LazyTensor │          │ XLA+LazyTensor     │
-│            │       over │          │                    │         over │          │                    │              over │          │                    │             over │          │                    │
-│            │     Oldest │          │                    │       Oldest │          │                    │            Oldest │          │                    │           Oldest │          │                    │
-│            │   Inductor │          │                    │     Inductor │          │                    │          Inductor │          │                    │         Inductor │          │                    │
-╞════════════╪════════════╪══════════╪════════════════════╪══════════════╪══════════╪════════════════════╪═══════════════════╪══════════╪════════════════════╪══════════════════╪══════════╪════════════════════╡
-│          0 │       1.30 │     0.00 │ Background_Matting │         0.77 │     0.00 │ Background_Matting │              0.73 │     0.00 │ Background_Matting │             0.41 │     0.00 │ Background_Matting │
-├────────────┼────────────┼──────────┼────────────────────┼──────────────┼──────────┼────────────────────┼───────────────────┼──────────┼────────────────────┼──────────────────┼──────────┼────────────────────┤
-│          1 │       1.52 │     0.07 │ BERT_pytorch       │         1.57 │     0.07 │ BERT_pytorch       │              1.37 │     0.06 │ BERT_pytorch       │                  │          │                    │
-╘════════════╧════════════╧══════════╧════════════════════╧══════════════╧══════════╧════════════════════╧═══════════════════╧══════════╧════════════════════╧══════════════════╧══════════╧════════════════════╛
+# ARGS: --backends inductor openxla+dynamo openxla+lazytensor --format=tab
+╒════════════╤════════════╤══════════╤════════════════════╤══════════════╤══════════╤════════════════════╤══════════════════╤══════════╤════════════════════╕
+│   Workload │    Speedup │   StdDev │ ModelName          │      Speedup │   StdDev │ ModelName          │          Speedup │   StdDev │ ModelName          │
+│            │   Inductor │          │ Inductor           │   XLA+Dynamo │          │ XLA+Dynamo         │   XLA+LazyTensor │          │ XLA+LazyTensor     │
+│            │       over │          │                    │         over │          │                    │             over │          │                    │
+│            │     Oldest │          │                    │       Oldest │          │                    │           Oldest │          │                    │
+│            │   Inductor │          │                    │     Inductor │          │                    │         Inductor │          │                    │
+╞════════════╪════════════╪══════════╪════════════════════╪══════════════╪══════════╪════════════════════╪══════════════════╪══════════╪════════════════════╡
+│          0 │       1.30 │     0.00 │ Background_Matting │         0.77 │     0.00 │ Background_Matting │             0.41 │     0.00 │ Background_Matting │
+├────────────┼────────────┼──────────┼────────────────────┼──────────────┼──────────┼────────────────────┼──────────────────┼──────────┼────────────────────┤
+│          1 │       1.52 │     0.07 │ BERT_pytorch       │         1.57 │     0.07 │ BERT_pytorch       │                  │          │                    │
+╘════════════╧════════════╧══════════╧════════════════════╧══════════════╧══════════╧════════════════════╧══════════════════╧══════════╧════════════════════╛
diff --git a/test/benchmarks/v100.inference.latest.test b/test/benchmarks/v100.inference.latest.test
index e87c8d126ed..5b5ed89be54 100644
--- a/test/benchmarks/v100.inference.latest.test
+++ b/test/benchmarks/v100.inference.latest.test
@@ -1,4 +1,4 @@
-# ARGS: --backends inductor openxla+dynamo openxla_eval+dynamo openxla+lazytensor --
-# Workload,Speedup(Inductor/Oldest Inductor),StdDev,ModelName(Inductor),Speedup(XLA+Dynamo/Oldest Inductor),StdDev,ModelName(XLA+Dynamo),Speedup(XLA_Eval+Dynamo/Oldest Inductor),StdDev,ModelName(XLA_Eval+Dynamo),Speedup(XLA+LazyTensor/Oldest Inductor),StdDev,ModelName(XLA+LazyTensor)
-0,1.2957024,0.0,Background_Matting,0.77297688,0.0,Background_Matting,0.7341254,0.0,Background_Matting,0.41071322,0.0,Background_Matting
-1,1.51952596,0.06679279,BERT_pytorch,1.56880282,0.06895882,BERT_pytorch,1.36859903,0.06015859,BERT_pytorch,,,
+# ARGS: --backends inductor openxla+dynamo openxla+lazytensor --
+# Workload,Speedup(Inductor/Oldest Inductor),StdDev,ModelName(Inductor),Speedup(XLA+Dynamo/Oldest Inductor),StdDev,ModelName(XLA+Dynamo),Speedup(XLA+LazyTensor/Oldest Inductor),StdDev,ModelName(XLA+LazyTensor)
+0,1.2957024,0.0,Background_Matting,0.77297688,0.0,Background_Matting,0.41071322,0.0,Background_Matting
+1,1.51952596,0.06679279,BERT_pytorch,1.56880282,0.06895882,BERT_pytorch,,,
diff --git a/test/benchmarks/v100.inference.latest.tier1.test b/test/benchmarks/v100.inference.latest.tier1.test
index a05f83ea435..1c1fcf8c36e 100644
--- a/test/benchmarks/v100.inference.latest.tier1.test
+++ b/test/benchmarks/v100.inference.latest.tier1.test
@@ -1,3 +1,3 @@
 # ARGS: --filter-by-tier=1
-# Workload,Speedup(Inductor/Oldest Inductor),StdDev,ModelName(Inductor),Speedup(XLA+Dynamo/Oldest Inductor),StdDev,ModelName(XLA+Dynamo),Speedup(XLA_Eval+Dynamo/Oldest Inductor),StdDev,ModelName(XLA_Eval+Dynamo)
-0,1.51952596,0.06679279,BERT_pytorch,1.56880282,0.06895882,BERT_pytorch,1.36859903,0.06015859,BERT_pytorch
+# Workload,Speedup(Inductor/Oldest Inductor),StdDev,ModelName(Inductor),Speedup(XLA+Dynamo/Oldest Inductor),StdDev,ModelName(XLA+Dynamo)
+0,1.51952596,0.06679279,BERT_pytorch,1.56880282,0.06895882,BERT_pytorch
diff --git a/test/benchmarks/v100.inference.latest_grouped.test b/test/benchmarks/v100.inference.latest_grouped.test
index 3ce6ad0d9e0..1c94ae44ac4 100644
--- a/test/benchmarks/v100.inference.latest_grouped.test
+++ b/test/benchmarks/v100.inference.latest_grouped.test
@@ -1,5 +1,4 @@
-# ARGS: --backends inductor openxla+dynamo openxla_eval+dynamo openxla+lazytensor --
-# ModelName,Speedup(Inductor/Oldest Inductor),StdDev,Speedup(XLA+Dynamo/Oldest Inductor),StdDev,Speedup(XLA_Eval+Dynamo/Oldest Inductor),StdDev,Speedup(XLA+LazyTensor/Oldest Inductor),StdDev
-Background_Matting,1.2957024,0.0,0.77297688,0.0,0.7341254,0.0,0.41071322,0.0
-BERT_pytorch,1.51952596,0.06679279,1.56880282,0.06895882,1.36859903,0.06015859,,
-GEOMEAN,1.40315838,0.03083885,1.10120312,0.02420242,1.00235887,0.02203001,0.41071322,0.0
+# ModelName,Speedup(Inductor/Oldest Inductor),StdDev,Speedup(XLA+Dynamo/Oldest Inductor),StdDev
+Background_Matting,1.2957024,0.0,0.77297688,0.0
+BERT_pytorch,1.51952596,0.06679279,1.56880282,0.06895882
+GEOMEAN,1.40315838,0.03083885,1.10120312,0.02420242
diff --git a/test/benchmarks/v100.inference.speedup.baseline_latest.test b/test/benchmarks/v100.inference.speedup.baseline_latest.test
index ff6382cb51d..f1e857eadea 100644
--- a/test/benchmarks/v100.inference.speedup.baseline_latest.test
+++ b/test/benchmarks/v100.inference.speedup.baseline_latest.test
@@ -1,4 +1,4 @@
 # ARGS: --baseline=latest
-# Datetime(UTC),Speedup(Inductor/Latest Inductor),StdDev,Speedup(XLA+Dynamo/Latest Inductor),StdDev,Speedup(XLA_Eval+Dynamo/Latest Inductor),StdDev
-2023-11-11 05:32:18.723407,0.71267792,0.01566335,0.60245072,0.0,0.55375084,0.0
-2023-11-12 05:32:18,1.0,0.0,0.78480315,0.0,0.71435904,0.0
+# Datetime(UTC),Speedup(Inductor/Latest Inductor),StdDev,Speedup(XLA+Dynamo/Latest Inductor),StdDev
+2023-11-11 05:32:18.723407,0.71267792,0.01566335,0.60245072,0.0
+2023-11-12 05:32:18,1.0,0.0,0.78480315,0.0
diff --git a/test/benchmarks/v100.inference.speedup.tab.test b/test/benchmarks/v100.inference.speedup.tab.test
index 57e4c539016..081f7a89cb9 100644
--- a/test/benchmarks/v100.inference.speedup.tab.test
+++ b/test/benchmarks/v100.inference.speedup.tab.test
@@ -1,12 +1,12 @@
 # ARGS: --format=tab
-╒════════════════════════════╤════════════╤══════════╤══════════════╤══════════╤═══════════════════╤══════════╕
-│ Datetime(UTC)              │    Speedup │   StdDev │      Speedup │   StdDev │           Speedup │   StdDev │
-│                            │   Inductor │          │   XLA+Dynamo │          │   XLA_Eval+Dynamo │          │
-│                            │       over │          │         over │          │              over │          │
-│                            │     Oldest │          │       Oldest │          │            Oldest │          │
-│                            │   Inductor │          │     Inductor │          │          Inductor │          │
-╞════════════════════════════╪════════════╪══════════╪══════════════╪══════════╪═══════════════════╪══════════╡
-│ 2023-11-11 05:32:18.723407 │       1.00 │     0.03 │         0.85 │     0.02 │              0.78 │     0.02 │
-├────────────────────────────┼────────────┼──────────┼──────────────┼──────────┼───────────────────┼──────────┤
-│ 2023-11-12 05:32:18        │       1.40 │     0.03 │         1.10 │     0.02 │              1.00 │     0.02 │
-╘════════════════════════════╧════════════╧══════════╧══════════════╧══════════╧═══════════════════╧══════════╛
+╒════════════════════════════╤════════════╤══════════╤══════════════╤══════════╕
+│ Datetime(UTC)              │    Speedup │   StdDev │      Speedup │   StdDev │
+│                            │   Inductor │          │   XLA+Dynamo │          │
+│                            │       over │          │         over │          │
+│                            │     Oldest │          │       Oldest │          │
+│                            │   Inductor │          │     Inductor │          │
+╞════════════════════════════╪════════════╪══════════╪══════════════╪══════════╡
+│ 2023-11-11 05:32:18.723407 │       1.00 │     0.03 │         0.85 │     0.02 │
+├────────────────────────────┼────────────┼──────────┼──────────────┼──────────┤
+│ 2023-11-12 05:32:18        │       1.40 │     0.03 │         1.10 │     0.02 │
+╘════════════════════════════╧════════════╧══════════╧══════════════╧══════════╛
diff --git a/test/benchmarks/v100.inference.speedup.test b/test/benchmarks/v100.inference.speedup.test
index a2daa03a1b3..70f6671038c 100644
--- a/test/benchmarks/v100.inference.speedup.test
+++ b/test/benchmarks/v100.inference.speedup.test
@@ -1,3 +1,3 @@
-# Datetime(UTC),Speedup(Inductor/Oldest Inductor),StdDev,Speedup(XLA+Dynamo/Oldest Inductor),StdDev,Speedup(XLA_Eval+Dynamo/Oldest Inductor),StdDev
-2023-11-11 05:32:18.723407,1.0,0.03108182,0.84533378,0.01857889,0.77700013,0.01707704
-2023-11-12 05:32:18,1.40315838,0.03083885,1.10120312,0.02420242,1.00235887,0.02203001
+# Datetime(UTC),Speedup(Inductor/Oldest Inductor),StdDev,Speedup(XLA+Dynamo/Oldest Inductor),StdDev
+2023-11-11 05:32:18.723407,1.0,0.03108182,0.84533378,0.01857889
+2023-11-12 05:32:18,1.40315838,0.03083885,1.10120312,0.02420242
diff --git a/test/benchmarks/v100.jsonl b/test/benchmarks/v100.jsonl
index 38aa717dea6..5bec92b3fdb 100644
--- a/test/benchmarks/v100.jsonl
+++ b/test/benchmarks/v100.jsonl
@@ -1,21 +1,17 @@
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "train", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.055821777787059546, 0.055821777787059546, 0.055821777787059546], "per_iter_time": [15.150130984999123, 0.055821777787059546, 0.055821777787059546, 0.055821777787059546]}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "eval", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.032508088905364275, 0.032508088905364275, 0.032508088905364275], "per_iter_time": [15.150130984999123, 0.032508088905364275, 0.032508088905364275, 0.032508088905364275]}, "outputs_file": null, "timestamp": 1699680738.7234068}
-{"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla_eval", "test": "eval", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.033508088905364275, 0.033508088905364275, 0.033508088905364275], "per_iter_time": [15.150130984999123, 0.033508088905364275, 0.033508088905364275, 0.033508088905364275]}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "eval", "batch_size": 16}, "repeat": 5, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.031173147726804018, 0.033173147726804018, 0.031173147726804018, 0.033173147726804018], "per_iter_time": [15.150130984999123, 0.031173147726804018, 0.033173147726804018, 0.031173147726804018, 0.033173147726804018]}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.10635835910215974, 0.10635835910215974, 0.10635835910215974], "per_iter_time": [15.150130984999123, 0.10635835910215974, 0.10635835910215974, 0.10635835910215974]}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "train", "batch_size": null}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": null, "per_iter_time": null}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.06068705682083964, 0.06068705682083964, 0.06068705682083964], "per_iter_time": [15.150130984999123, 0.06068705682083964, 0.06068705682083964, 0.06068705682083964]}, "outputs_file": null, "timestamp": 1699680738.7234068}
-{"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla_eval", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.06968705682083964, 0.06968705682083964, 0.06968705682083964], "per_iter_time": [15.150130984999123, 0.06968705682083964, 0.06968705682083964, 0.06968705682083964]}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.043817784171551466, 0.043817784171551466, 0.043817784171551466], "per_iter_time": [15.150130984999123, 0.043817784171551466, 0.043817784171551466, 0.043817784171551466]}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 4}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.45498354313895106, 0.45498354313895106, 0.45498354313895106], "per_iter_time": [15.150130984999123, 0.45498354313895106, 0.45498354313895106, 0.45498354313895106]}, "outputs_file": null, "timestamp": 1699680738.7234068}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "train", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.045821777787059546, 0.045821777787059546, 0.045821777787059546], "per_iter_time": [15.150130984999123, 0.045821777787059546, 0.045821777787059546, 0.045821777787059546]}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "eval", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.020508088905364275, 0.020508088905364275, 0.020508088905364275], "per_iter_time": [15.150130984999123, 0.020508088905364275, 0.020508088905364275, 0.020508088905364275]}, "outputs_file": null, "timestamp": 1699767138}
-{"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla_eval", "test": "eval", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.023508088905364275, 0.023508088905364275, 0.023508088905364275], "per_iter_time": [15.150130984999123, 0.023508088905364275, 0.023508088905364275, 0.023508088905364275]}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "eval", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.021173147726804018, 0.021173147726804018, 0.021173147726804018], "per_iter_time": [15.150130984999123, 0.021173147726804018, 0.021173147726804018, 0.021173147726804018]}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "BERT_pytorch"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 16}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.09635835910215974, 0.09635835910215974, 0.09635835910215974], "per_iter_time": [15.150130984999123, 0.09635835910215974, 0.09635835910215974, 0.09635835910215974]}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "train", "batch_size": null}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": null, "per_iter_time": null}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.05668705682083964, 0.05668705682083964, 0.05668705682083964], "per_iter_time": [15.150130984999123, 0.05668705682083964, 0.05668705682083964, 0.05668705682083964]}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": null, "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [10.150130984999123, 0.10668705682083964, 0.10668705682083964, 0.10668705682083964], "per_iter_time": [10.150130984999123, 0.10668705682083964, 0.10668705682083964, 0.10668705682083964]}, "outputs_file": null, "timestamp": 1699767138}
-{"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": "PJRT", "xla_flags": null, "dynamo": "openxla_eval", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.05968705682083964, 0.05968705682083964, 0.05968705682083964], "per_iter_time": [15.150130984999123, 0.05968705682083964, 0.05968705682083964, 0.05968705682083964]}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "eval", "batch_size": 1}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.033817784171551466, 0.033817784171551466, 0.033817784171551466], "per_iter_time": [15.150130984999123, 0.033817784171551466, 0.033817784171551466, 0.033817784171551466]}, "outputs_file": null, "timestamp": 1699767138}
 {"model": {"suite_name": "torchbench", "model_name": "Background_Matting"}, "experiment": {"experiment_name": "run_all", "accelerator": "cuda", "accelerator_model": "One of Tesla V100-SXM2-16GB, ...", "xla": null, "xla_flags": null, "dynamo": "inductor", "test": "train", "batch_size": 4}, "repeat": 4, "iterations_per_run": 1, "metrics": {"total_time": [15.150130984999123, 0.35498354313895106, 0.35498354313895106, 0.35498354313895106], "per_iter_time": [15.150130984999123, 0.35498354313895106, 0.35498354313895106, 0.35498354313895106]}, "outputs_file": null, "timestamp": 1699767138}
diff --git a/test/dynamo/test_bridge.py b/test/dynamo/test_bridge.py
index b7cb4db5e8c..1daf48f658b 100644
--- a/test/dynamo/test_bridge.py
+++ b/test/dynamo/test_bridge.py
@@ -247,7 +247,7 @@ def foo(x):
       return module(x)
 
     x = torch.randint(0, 10, (10,), device=device)
-    self._compile_and_check(foo, (x,), backend="openxla_eval")
+    self._compile_and_check(foo, (x,), backend="openxla")
 
   def test_inputs_not_computed(self):