From 3e30c2e34f137fa6ce385ab72ed2fba39e879e7f Mon Sep 17 00:00:00 2001 From: Henning Becker Date: Fri, 10 May 2024 03:25:36 -0700 Subject: [PATCH] Make the CI at least build unsupported GPU test targets This should allow uncovering build failures for tests that require hardware that is not available in a presubmit - like H100 GPUs at the time of writing this. PiperOrigin-RevId: 632440611 --- third_party/xla/build_tools/build.py | 69 +++++++++++++------ .../service/gpu/tests/gpu_fused_mha_test.cc | 2 +- 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/third_party/xla/build_tools/build.py b/third_party/xla/build_tools/build.py index 8b353f3cdbb9e7..a96a0367b3d2ff 100755 --- a/third_party/xla/build_tools/build.py +++ b/third_party/xla/build_tools/build.py @@ -159,22 +159,42 @@ class Build: docker_image: DockerImage target_patterns: Tuple[str, ...] configs: Tuple[str, ...] = () - tag_filters: Tuple[str, ...] = () + build_tag_filters: Tuple[str, ...] = () + test_tag_filters: Tuple[str, ...] = () action_env: Dict[str, Any] = dataclasses.field(default_factory=dict) test_env: Dict[str, Any] = dataclasses.field(default_factory=dict) options: Dict[str, Any] = dataclasses.field(default_factory=dict) - def bazel_test_command(self) -> List[str]: + def _common_bazel_options(self) -> List[str]: options = _dict_to_cli_options(self.options) configs = [f"--config={config}" for config in self.configs] - build_tag_filters = f"--build_tag_filters={','.join(self.tag_filters)}" - test_tag_filters = f"--test_tag_filters={','.join(self.tag_filters)}" + build_tag_filters = ( + f"--build_tag_filters={','.join(self.build_tag_filters)}" + ) + test_tag_filters = f"--test_tag_filters={','.join(self.test_tag_filters)}" action_env = [f"--action_env={k}={v}" for k, v in self.action_env.items()] test_env = [f"--test_env={k}={v}" for k, v in self.test_env.items()] tag_filters = [build_tag_filters, test_tag_filters] - all_options = tag_filters + configs + action_env + test_env + options - return ["bazel", "test", *all_options, "--", *self.target_patterns] + return tag_filters + configs + action_env + test_env + options + + def bazel_test_command(self) -> List[str]: + return [ + "bazel", + "test", + *self._common_bazel_options(), + "--", + *self.target_patterns, + ] + + def bazel_build_command(self) -> List[str]: + return [ + "bazel", + "build", + *self._common_bazel_options(), + "--", + *self.target_patterns, + ] def _tag_filters_for_compute_capability( @@ -214,7 +234,8 @@ def nvidia_gpu_build_with_compute_capability( docker_image=_CUDNN_9_IMAGE, target_patterns=_XLA_DEFAULT_TARGET_PATTERNS, configs=configs, - tag_filters=("-no_oss", "requires-gpu-nvidia") + extra_gpu_tags, + test_tag_filters=("-no_oss", "requires-gpu-nvidia") + extra_gpu_tags, + build_tag_filters=("-no_oss", "requires-gpu-nvidia"), options=dict( run_under="//tools/ci_build/gpu_build:parallel_gpu_execute", repo_env=f"TF_CUDA_COMPUTE_CAPABILITIES={compute_capability/10}", @@ -223,34 +244,39 @@ def nvidia_gpu_build_with_compute_capability( ) +cpu_x86_tags = ( + "-no_oss", + "-gpu", + "-requires-gpu-nvidia", + "-requires-gpu-amd", +) _CPU_X86_BUILD = Build( type_=BuildType.CPU_X86, repo="openxla/xla", docker_image=_DEFAULT_IMAGE, configs=("warnings", "nonccl", "rbe_linux_cpu"), target_patterns=_XLA_DEFAULT_TARGET_PATTERNS + ("-//xla/service/gpu/...",), - tag_filters=( - "-no_oss", - "-gpu", - "-requires-gpu-nvidia", - "-requires-gpu-amd", - ), + build_tag_filters=cpu_x86_tags, + test_tag_filters=cpu_x86_tags, options=_DEFAULT_BAZEL_OPTIONS, ) + +cpu_arm_tags = ( + "-no_oss", + "-gpu", + "-requires-gpu-nvidia", + "-requires-gpu-amd", + "-not_run:arm", +) _CPU_ARM64_BUILD = Build( type_=BuildType.CPU_ARM64, repo="openxla/xla", docker_image=_ARM64_JAX_MULTI_PYTHON_IMAGE, configs=("warnings", "rbe_cross_compile_linux_arm64_xla", "nonccl"), target_patterns=_XLA_DEFAULT_TARGET_PATTERNS + ("-//xla/service/gpu/...",), - tag_filters=( - "-no_oss", - "-gpu", - "-requires-gpu-nvidia", - "-not_run:arm", - "-requires-gpu-amd", - ), options={**_DEFAULT_BAZEL_OPTIONS, "build_tests_only": True}, + build_tag_filters=cpu_arm_tags, + test_tag_filters=cpu_arm_tags, ) # TODO(ddunleavy): Setup additional build for a100 tests once L4 RBE is ready. _GPU_BUILD = nvidia_gpu_build_with_compute_capability( @@ -290,7 +316,8 @@ def nvidia_gpu_build_with_compute_capability( "tensorflow_testing_rbe_linux", ), target_patterns=("//tests:gpu_tests", "//tests:backend_independent_tests"), - tag_filters=("-multiaccelerator",), + build_tag_filters=("-multiaccelerator",), + test_tag_filters=("-multiaccelerator",), test_env=dict( JAX_SKIP_SLOW_TESTS=1, TF_CPP_MIN_LOG_LEVEL=0, diff --git a/third_party/xla/xla/service/gpu/tests/gpu_fused_mha_test.cc b/third_party/xla/xla/service/gpu/tests/gpu_fused_mha_test.cc index 219370414228b7..60a42cce78c1ec 100644 --- a/third_party/xla/xla/service/gpu/tests/gpu_fused_mha_test.cc +++ b/third_party/xla/xla/service/gpu/tests/gpu_fused_mha_test.cc @@ -125,7 +125,7 @@ class MultiHeadedAttentionTest : public GpuCodegenTest { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr test_module, ParseAndReturnVerifiedModule(hlo_string)); TF_ASSERT_OK_AND_ASSIGN(num_fmha_calls, - CountFMHACalls(test_module->Clone())); + CountFMHACalls(std::move(test_module->Clone()))); EXPECT_EQ(num_fmha_calls, expected_num_fmha_calls); const Literal actual_result = ExecuteAndTransfer(std::move(test_module), literals);