diff --git a/.circleci/triton.sh b/.circleci/triton.sh index b608bae3fe6..9d8f51634b8 100755 --- a/.circleci/triton.sh +++ b/.circleci/triton.sh @@ -30,9 +30,4 @@ python setup.py install XLA_DIR=$PYTORCH_DIR/xla export TF_CUDA_COMPUTE_CAPABILITIES="compute_86" export XLA_CUDA=1 -build_torch_xla $XLA_DIR - -export GCLOUD_SERVICE_KEY_FILE="$XLA_DIR/default_credentials.json" -export SILO_NAME='cache-silo-ci-dev-3.8_cuda_12.1' # cache bucket for CI -export TRITON_PTXAS_PATH='/usr/local/cuda/bin/ptxas' -python3 $XLA_DIR/test/test_triton.py \ No newline at end of file +build_torch_xla $XLA_DIR \ No newline at end of file diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index c88621796b7..789d0579272 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -14,7 +14,7 @@ on: required: false type: string description: Runner type for the test - default: linux.24xlarge + default: linux.12xlarge cuda: required: false type: string @@ -68,8 +68,8 @@ jobs: # if image layers are not present in the repo. # Note: disable the following 2 lines while testing a new image, so we do not # push to the upstream. - # docker tag "${GCR_DOCKER_IMAGE}" "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null - # docker push "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null + docker tag "${GCR_DOCKER_IMAGE}" "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null + docker push "${ECR_DOCKER_IMAGE_BASE}:v1.1-lite" >/dev/null - name: Start the container shell: bash run: | diff --git a/.github/workflows/_triton.yml b/.github/workflows/_triton.yml deleted file mode 100644 index b3e4bf8cefa..00000000000 --- a/.github/workflows/_triton.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: triton-test -on: - workflow_call: - inputs: - docker-image: - required: true - type: string - description: Image to test on - runner: - required: false - type: string - description: Runner type for the test - default: linux.12xlarge - timeout-minutes: - required: false - type: number - default: 270 - description: | - Set the maximum (in minutes) how long the workflow should take to finish - secrets: - gcloud-service-key: - required: true - description: Secret to access Bazel build cache -jobs: - test: - runs-on: ${{ inputs.runner }} - timeout-minutes: ${{ inputs.timeout-minutes }} - env: - DOCKER_IMAGE: ${{ inputs.docker-image }} - WORKDIR: /var/lib/jenkins/workspace - GCLOUD_SERVICE_KEY: ${{ secrets.gcloud-service-key }} - steps: - - name: Setup Linux - uses: pytorch/test-infra/.github/actions/setup-linux@main - - name: Setup SSH (Click me for login details) - uses: pytorch/test-infra/.github/actions/setup-ssh@main - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - instructions: | - Tests are done inside the container, to start an interactive session run: - docker exec -it $(docker container ps --format '{{.ID}}') bash - - name: Install gcloud CLI - if: ${{ inputs.collect-coverage }} - shell: bash - run: | - sudo tee -a /etc/yum.repos.d/google-cloud-sdk.repo << EOM - [google-cloud-cli] - name=Google Cloud CLI - baseurl=https://packages.cloud.google.com/yum/repos/cloud-sdk-el8-x86_64 - enabled=1 - gpgcheck=1 - repo_gpgcheck=0 - gpgkey=https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg - EOM - sudo yum install -y google-cloud-cli - - name: Auth to GCR - if: ${{ inputs.collect-coverage }} - shell: bash - run: | - echo "${GCLOUD_SERVICE_KEY}" | gcloud auth activate-service-account --key-file=- - - name: Download and run docker image from GCR - shell: bash - run: | - echo "DOCKER_IMAGE: ${DOCKER_IMAGE}" - docker pull "${DOCKER_IMAGE}" - pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -t -d -w "$WORKDIR" "${DOCKER_IMAGE}") - echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json" - echo "pid=${pid}" >> "${GITHUB_ENV}" - - name: Test - shell: bash - run: | - docker exec --privileged -u jenkins "${pid}" bash -c '.circleci/triton.sh' - - name: Teardown Linux - uses: pytorch/test-infra/.github/actions/teardown-linux@main - if: always() - diff --git a/.github/workflows/triton.yml b/.github/workflows/triton.yml index 5893f0834d5..70a9ce82835 100644 --- a/.github/workflows/triton.yml +++ b/.github/workflows/triton.yml @@ -20,11 +20,12 @@ concurrency: cancel-in-progress: true jobs: - test-triton: - runs-on: linux.g5.4xlarge.nvidia.gpu + build-triton: + runs-on: linux.24xlarge timeout-minutes: 300 env: DOCKER_IMAGE: gcr.io/tpu-pytorch/xla_base:dev-3.8_cuda_12.1 + ECR_DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base WORKDIR: /triton_dir steps: - name: Setup Linux @@ -52,6 +53,45 @@ jobs: shell: bash run: | docker exec --privileged -u jenkins "${pid}" bash -c ".circleci/triton.sh" + - name: Push built docker image to ECR + id: upload-docker-image + shell: bash + run: | + export COMMIT_DOCKER_IMAGE="${ECR_DOCKER_IMAGE_BASE}:triton-${GITHUB_SHA}" + time docker commit "${pid}" "${COMMIT_DOCKER_IMAGE}" + time docker push "${COMMIT_DOCKER_IMAGE}" + echo "docker-image=${COMMIT_DOCKER_IMAGE}" >> "${GITHUB_OUTPUT}" - name: Teardown Linux uses: pytorch/test-infra/.github/actions/teardown-linux@main if: always() + test-triton: + runs-on: linux.g5.4xlarge.nvidia.gpu + timeout-minutes: 300 + needs: build-triton + env: + DOCKER_IMAGE: ${{ needs.build-triton.outputs.docker-image }} + WORKDIR: /triton_dir + steps: + - name: Setup Linux + uses: pytorch/test-infra/.github/actions/setup-linux@main + - name: Setup SSH (Click me for login details) + uses: pytorch/test-infra/.github/actions/setup-ssh@main + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + instructions: | + Tests are done inside the container, to start an interactive session run: + docker exec -it $(docker container ps --format '{{.ID}}') bash + - name: Download and run docker image from GCR + shell: bash + run: | + echo "DOCKER_IMAGE: ${DOCKER_IMAGE}" + docker pull "${DOCKER_IMAGE}" + pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -t -d -w "$WORKDIR" "${DOCKER_IMAGE}") + echo "pid=${pid}" >> "${GITHUB_ENV}" + - name: Test + shell: bash + run: | + docker exec --privileged -u jenkins "${pid}" bash -c 'TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas python test/test_triton.py' + - name: Teardown Linux + uses: pytorch/test-infra/.github/actions/teardown-linux@main + if: always() \ No newline at end of file