diff --git a/.circleci/triton.sh b/.circleci/triton.sh
index b608bae3fe6..9d8f51634b8 100755
--- a/.circleci/triton.sh
+++ b/.circleci/triton.sh
@@ -30,9 +30,4 @@ python setup.py install
 XLA_DIR=$PYTORCH_DIR/xla
 export TF_CUDA_COMPUTE_CAPABILITIES="compute_86"
 export XLA_CUDA=1
-build_torch_xla $XLA_DIR
-
-export GCLOUD_SERVICE_KEY_FILE="$XLA_DIR/default_credentials.json"
-export SILO_NAME='cache-silo-ci-dev-3.8_cuda_12.1'  # cache bucket for CI
-export TRITON_PTXAS_PATH='/usr/local/cuda/bin/ptxas'
-python3 $XLA_DIR/test/test_triton.py
\ No newline at end of file
+build_torch_xla $XLA_DIR
\ No newline at end of file
diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml
index c88621796b7..789d0579272 100644
--- a/.github/workflows/_build.yml
+++ b/.github/workflows/_build.yml
@@ -14,7 +14,7 @@ on:
         required: false
         type: string
         description: Runner type for the test
-        default: linux.24xlarge
+        default: linux.12xlarge
       cuda:
         required: false
         type: string
@@ -68,8 +68,8 @@ jobs:
             # if image layers are not present in the repo.
             # Note: disable the following 2 lines while testing a new image, so we do not
             # push to the upstream.
-            # docker tag "${GCR_DOCKER_IMAGE}" "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null
-            # docker push "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null
+            docker tag "${GCR_DOCKER_IMAGE}" "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null
+            docker push "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null
       - name: Start the container
         shell: bash
         run: |
diff --git a/.github/workflows/_triton.yml b/.github/workflows/_triton.yml
deleted file mode 100644
index b3e4bf8cefa..00000000000
--- a/.github/workflows/_triton.yml
+++ /dev/null
@@ -1,76 +0,0 @@
-name: triton-test
-on:
-  workflow_call:
-    inputs:
-      docker-image:
-        required: true
-        type: string
-        description: Image to test on
-      runner:
-        required: false
-        type: string
-        description: Runner type for the test
-        default: linux.12xlarge
-      timeout-minutes:
-        required: false
-        type: number
-        default: 270
-        description: |
-          Set the maximum (in minutes) how long the workflow should take to finish
-    secrets:
-      gcloud-service-key:
-        required: true
-        description: Secret to access Bazel build cache
-jobs:
-  test:
-    runs-on: ${{ inputs.runner }}
-    timeout-minutes: ${{ inputs.timeout-minutes }}
-    env:
-      DOCKER_IMAGE: ${{ inputs.docker-image }}
-      WORKDIR: /var/lib/jenkins/workspace
-      GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }}
-    steps:
-      - name: Setup Linux
-        uses: pytorch/test-infra/.github/actions/setup-linux@main
-      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
-        with:
-          github-secret: ${{ secrets.GITHUB_TOKEN }}
-          instructions: |
-            Tests are done inside the container, to start an interactive session run:
-              docker exec -it $(docker container ps --format '{{.ID}}') bash
-      - name: Install gcloud CLI
-        if: ${{ inputs.collect-coverage }}
-        shell: bash
-        run: |
-          sudo tee -a /etc/yum.repos.d/google-cloud-sdk.repo << EOM
-          [google-cloud-cli]
-          name=Google Cloud CLI
-          baseurl=https://packages.cloud.google.com/yum/repos/cloud-sdk-el8-x86_64
-          enabled=1
-          gpgcheck=1
-          repo_gpgcheck=0
-          gpgkey=https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
-          EOM
-          sudo yum install -y google-cloud-cli
-      - name: Auth to GCR
-        if: ${{ inputs.collect-coverage }}
-        shell: bash
-        run: |
-          echo "${GCLOUD_SERVICE_KEY}" | gcloud auth activate-service-account --key-file=-
-      - name: Download and run docker image from GCR
-        shell: bash
-        run: |
-          echo "DOCKER_IMAGE: ${DOCKER_IMAGE}"
-          docker pull "${DOCKER_IMAGE}"
-          pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -t -d -w "$WORKDIR" "${DOCKER_IMAGE}")
-          echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json"
-          echo "pid=${pid}" >> "${GITHUB_ENV}"
-      - name: Test
-        shell: bash
-        run: |
-          docker exec --privileged -u jenkins "${pid}" bash -c '.circleci/triton.sh'
-      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
-        if: always()
-
diff --git a/.github/workflows/triton.yml b/.github/workflows/triton.yml
index 5893f0834d5..70a9ce82835 100644
--- a/.github/workflows/triton.yml
+++ b/.github/workflows/triton.yml
@@ -20,11 +20,12 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  test-triton:
-    runs-on: linux.g5.4xlarge.nvidia.gpu
+  build-triton:
+    runs-on: linux.24xlarge
     timeout-minutes: 300
     env:
       DOCKER_IMAGE: gcr.io/tpu-pytorch/xla_base:dev-3.8_cuda_12.1
+      ECR_DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base
       WORKDIR: /triton_dir
     steps:
       - name: Setup Linux
@@ -52,6 +53,45 @@ jobs:
         shell: bash
         run: |
           docker exec --privileged -u jenkins "${pid}" bash -c ".circleci/triton.sh"
+      - name: Push built docker image to ECR
+        id: upload-docker-image
+        shell: bash
+        run: |
+          export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE_BASE}:triton-${GITHUB_SHA}"
+          time docker commit "${pid}" "${COMMIT_DOCKER_IMAGE}"
+          time docker push "${COMMIT_DOCKER_IMAGE}"
+          echo "docker-image=${COMMIT_DOCKER_IMAGE}" >> "${GITHUB_OUTPUT}"
       - name: Teardown Linux
         uses: pytorch/test-infra/.github/actions/teardown-linux@main
         if: always()
+  test-triton:
+    runs-on: linux.g5.4xlarge.nvidia.gpu
+    timeout-minutes: 300
+    needs: build-triton
+    env:
+      DOCKER_IMAGE: ${{ needs.build-triton.outputs.docker-image }}
+      WORKDIR: /triton_dir
+    steps:
+      - name: Setup Linux
+        uses: pytorch/test-infra/.github/actions/setup-linux@main
+      - name: Setup SSH (Click me for login details)
+        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        with:
+          github-secret: ${{ secrets.GITHUB_TOKEN }}
+          instructions: |
+            Tests are done inside the container, to start an interactive session run:
+              docker exec -it $(docker container ps --format '{{.ID}}') bash
+      - name: Download and run docker image from GCR
+        shell: bash
+        run: |
+          echo "DOCKER_IMAGE: ${DOCKER_IMAGE}"
+          docker pull "${DOCKER_IMAGE}"
+          pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -t -d -w "$WORKDIR" "${DOCKER_IMAGE}")
+          echo "pid=${pid}" >> "${GITHUB_ENV}"
+      - name: Test
+        shell: bash
+        run: |
+          docker exec --privileged -u jenkins "${pid}" bash -c 'TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas python test/test_triton.py'
+      - name: Teardown Linux
+        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        if: always()
\ No newline at end of file