pytorch · qihqi · Sep 26, 2024 · Sep 27, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/.bazelrc b/.bazelrc
@@ -25,8 +25,8 @@ build -c opt
 build --config=short_logs
 
 # Force GCC because clang/bazel has issues.
-build --action_env=CC=gcc
-build --action_env=CXX=g++
+build --action_env=CC=clang
+build --action_env=CXX=clang++
 build --spawn_strategy=standalone
 
 ###########################################################################
@@ -53,6 +53,11 @@ build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --@local_config_cuda//:enable_cuda
 build:cuda --define=xla_python_enable_gpu=true
 build:cuda --cxxopt=-DXLA_CUDA=1
+build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
+build:cuda --repo_env=HERMETIC_CUDNN_VERSION="9.1.1"
+#build:cuda --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_80,compute_90"
+build:cuda --@local_config_cuda//cuda:include_cuda_libs=true
+test:cuda --@cuda_driver//:enable_forward_compatibility=true
 
 # Coverage with cuda/gcc/nvcc requires manually setting coverage flags.
 coverage:cuda --per_file_copt=third_party/.*,torch_xla/.*@--coverage

diff --git a/.circleci/build.sh b/.circleci/build.sh
@@ -53,4 +53,6 @@ export BUILD_CPP_TESTS='1'
 export TF_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_70,sm_75,compute_80,$TF_CUDA_COMPUTE_CAPABILITIES"
 build_torch_xla $XLA_DIR
 
+apt-get install clang-17
+
 popd
diff --git a/.github/workflows/_build_plugin.yml b/.github/workflows/_build_plugin.yml
@@ -41,6 +41,8 @@ jobs:
         shell: bash
         run: |
           cd pytorch/xla/infra/ansible
+          apt install -y clang
+          bazel clean --expunge || true
           ansible-playbook playbook.yaml -vvv -e "stage=build_plugin arch=amd64 accelerator=cuda cuda_compute_capabilities=5.2,7.5,8.6 src_root=${GITHUB_WORKSPACE} cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps
       - name: Upload wheel
         uses: actions/upload-artifact@v4

diff --git a/.github/workflows/_build_torch_xla.yml b/.github/workflows/_build_torch_xla.yml
@@ -47,6 +47,8 @@ jobs:
         shell: bash
         run: |
           cd pytorch/xla/infra/ansible
+          apt install -y clang
+          bazel clean --expunge || true
           ansible-playbook playbook.yaml -vvv -e "stage=build arch=amd64 accelerator=tpu src_root=${GITHUB_WORKSPACE} bundle_libtpu=0 build_cpp_tests=1 git_versioned_xla_build=1 cache_suffix=-ci" --skip-tags=fetch_srcs,install_deps
       - name: Upload wheel
         uses: actions/upload-artifact@v4

diff --git a/WORKSPACE b/WORKSPACE
@@ -50,7 +50,7 @@ new_local_repository(
 #    curl -L https://github.com/openxla/xla/archive/<git hash>.tar.gz | sha256sum
 #    and update the sha256 with the result.
 
-xla_hash = '32ebd694c4d0442e241d76324ff1a721831366b4'
+xla_hash = '590cd6fcd1ed24ab9cf494789a0fc524b94a4a6a'
 
 http_archive(
     name = "xla",
@@ -136,7 +136,56 @@ load("@xla//:workspace0.bzl", "xla_workspace0")
 
 xla_workspace0()
 
-load("@tsl//third_party/gpus:cuda_configure.bzl", "cuda_configure")
+load(
+    "@tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
+    "cuda_json_init_repository",
+)
+
+cuda_json_init_repository()
+
+load(
+   "@tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
+   "cuda_json_init_repository",
+)
+
+cuda_json_init_repository()
+
+load(
+   "@cuda_redist_json//:distributions.bzl",
+   "CUDA_REDISTRIBUTIONS",
+   "CUDNN_REDISTRIBUTIONS",
+)
+load(
+   "@tsl//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
+   "cuda_redist_init_repositories",
+   "cudnn_redist_init_repository",
+)
+
+cuda_redist_init_repositories(
+   cuda_redistributions = CUDA_REDISTRIBUTIONS,
+)
+
+cudnn_redist_init_repository(
+   cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
+)
+
+load(
+   "@tsl//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
+   "cuda_configure",
+)
+
 cuda_configure(name = "local_config_cuda")
-load("@tsl//third_party/nccl:nccl_configure.bzl", "nccl_configure")
+
+load(
+   "@tsl//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
+   "nccl_redist_init_repository",
+)
+
+nccl_redist_init_repository()
+
+load(
+   "@tsl//third_party/nccl/hermetic:nccl_configure.bzl",
+   "nccl_configure",
+)
+
 nccl_configure(name = "local_config_nccl")
@@ -16,6 +16,8 @@ apt:
       - clang-format
       - gcc-10
       - g++-10
+      - clang-17
+      - clang
       - lcov
       - less
 

@@ -3,10 +3,10 @@
 release_env:
   common:
     # Force GCC because clang/bazel has issues.
-    CC: gcc-10
-    CXX: g++-10
-    # CC: "clang-{{ clang_version }}"
-    # CXX: "clang++-{{ clang_version }}"
+    # CC: gcc-10
+    #CXX: g++-10
+    CC: "clang"
+    CXX: "clang++"
     LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib"
 
   tpu:
@@ -24,8 +24,10 @@ build_env:
     # Set explicitly to 0 as setup.py defaults this flag to true if unset.
     BUILD_CPP_TESTS: "{{ build_cpp_tests }}"
     # Force GCC because clang/bazel has issues.
-    CC: gcc-10
-    CXX: g++-10
+    # CC: gcc-10
+    # CXX: g++-10
+    CC: "clang"
+    CXX: "clang++"
     PYTORCH_BUILD_NUMBER: 1
     TORCH_XLA_VERSION: "{{ package_version }}"
     PYTORCH_BUILD_VERSION: "{{ package_version }}"

diff --git a/setup.py b/setup.py
@@ -64,7 +64,7 @@
 
 base_dir = os.path.dirname(os.path.abspath(__file__))
 
-_date = '20240913'
+_date = '20240916'
 _libtpu_version = f'0.1.dev{_date}'
 _libtpu_storage_path = f'https://storage.googleapis.com/libtpu-nightly-releases/wheels/libtpu-nightly/libtpu_nightly-{_libtpu_version}+nightly-py3-none-any.whl'
 _jax_version = f'0.4.33.dev{_date}'

diff --git a/torch_xla/csrc/BUILD b/torch_xla/csrc/BUILD
@@ -151,6 +151,7 @@ ptxla_cc_library(
         "@xla//xla/client/lib:slicing",
         "@xla//xla/client/lib:sorting",
         "@xla//xla/client/lib:svd",
+        "@xla//xla/hlo/pass:hlo_pass_pipeline",
         "@xla//xla/stream_executor:dnn",
         "@tsl//tsl/platform:errors",
         "@tsl//tsl/profiler/lib:traceme",

diff --git a/torch_xla/csrc/runtime/ifrt_computation_client.h b/torch_xla/csrc/runtime/ifrt_computation_client.h
@@ -19,6 +19,7 @@
 #include "xla/python/ifrt/hlo/hlo_program.h"
 #include "xla/python/pjrt_ifrt/pjrt_array.h"
 #include "xla/python/pjrt_ifrt/pjrt_client.h"
+#include "xla/python/pjrt_ifrt/pjrt_dtype.h"
 #include "xla/python/pjrt_ifrt/xla_compiler.h"
 #include "xla/shape.h"
 

diff --git a/torch_xla/csrc/runtime/pjrt_computation_client.cc b/torch_xla/csrc/runtime/pjrt_computation_client.cc
@@ -1025,7 +1025,7 @@ void PjRtComputationClient::RegisterCustomCall(const std::string& fn_name,
   args.function_name = fn_name.c_str();
   args.function_name_size = fn_name.size();
   args.api_version = 0;
-  args.custom_call_function = function_ptr;
+  args.handler_execute = function_ptr;
   PJRT_Error* error =
       reinterpret_cast<const PJRT_Gpu_Custom_Call*>(next)->custom_call(&args);
   if (error) {

diff --git a/torch_xla/csrc/xla_sharding_util.cpp b/torch_xla/csrc/xla_sharding_util.cpp
@@ -23,9 +23,9 @@
 #include "tsl/profiler/lib/traceme.h"
 #include "xla/execution_options_util.h"
 #include "xla/hlo/ir/hlo_module.h"
+#include "xla/hlo/pass/hlo_pass_pipeline.h"
 #include "xla/protobuf_util.h"
 #include "xla/service/hlo_parser.h"
-#include "xla/service/hlo_pass_pipeline.h"
 #include "xla/service/hlo_verifier.h"
 #include "xla/service/sharding_propagation.h"
 #include "xla/service/spmd/spmd_partitioner.h"