# Description:
#   GPU-specific components in XLA service implementation.

load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
load(
    "@local_config_rocm//rocm:build_defs.bzl",
    "if_rocm_hipblaslt",
    "if_rocm_is_configured",
    "rocm_copts",
)
load(
    "@local_tsl//tsl/platform:build_config.bzl",
    "tf_proto_library",
)
load(
    "@local_tsl//tsl/platform:build_config_root.bzl",
    "if_static",
)
load(
    "@local_tsl//tsl/platform/default:cuda_build_defs.bzl",
    "if_cuda_is_configured",
)
load("//xla:xla.bzl", "xla_cc_test", "xla_cub_deps", "xla_internal")
load(
    "//xla/service/gpu:build_defs.bzl",
    "build_cub_sort_kernels",
    "get_cub_sort_kernel_types",
    "gpu_kernel_library",
)
load(
    "//xla/stream_executor:build_defs.bzl",
    "if_gpu_is_configured",
)
load("//xla/tests:build_defs.bzl", "xla_test")
load(
    "//xla/tsl:tsl.bzl",
    "if_google",
    "if_oss",
    "internal_visibility",
    "tsl_copts",
    "tsl_gpu_library",
)
load("//xla/tsl:tsl.default.bzl", "filegroup", "get_compatible_with_portable")

package(
    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
    default_visibility = internal_visibility([":friends"]),
    licenses = ["notice"],
)

package_group(
    name = "friends",
    includes = [
        "//xla:friends",
    ],
)

# Filegroup used to collect source files for dependency checking.
filegroup(
    name = "c_srcs",
    data = glob([
        "**/*.cc",
        "**/*.h",
    ]),
)

tf_proto_library(
    name = "backend_configs",
    srcs = ["backend_configs.proto"],
    cc_api_version = 2,
    make_default_target_header_only = True,
    protodeps = [
        "//xla:xla_data_proto",
        "//xla:autotuning_proto",
        "@local_tsl//tsl/protobuf:dnn_proto",
    ],
)

xla_cc_test(
    name = "backend_configs_test",
    srcs = ["backend_configs_test.cc"],
    deps = [
        ":backend_configs_cc",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_executable_run_options",
    srcs = ["gpu_executable_run_options.cc"],
    hdrs = ["gpu_executable_run_options.h"],
    compatible_with = get_compatible_with_portable(),
    visibility = ["//visibility:public"],
    deps = [
        "//xla:executable_run_options",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla/service:executable",
        "//xla/service:global_device_id",
        "//xla/service/gpu/runtime:nccl_clique_key",
        "//xla/stream_executor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_constants",
    hdrs = ["gpu_constants.h"],
)

cc_library(
    name = "gpu_memory_space_assignment",
    hdrs = ["gpu_memory_space_assignment.h"],
    deps = [
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:hlo_alias_analysis",
        "//xla/service:hlo_ordering",
        "//xla/service:hlo_value",
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "launch_dimensions",
    srcs = [
        "launch_dimensions.cc",
    ],
    hdrs = [
        "launch_dimensions.h",
    ],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:launch_dim",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
    ],
)

xla_test(
    name = "custom_call_test",
    srcs = if_gpu_is_configured(["custom_call_test.cc"]),
    backends = ["gpu"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        "//xla:debug_options_flags",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:test_helpers",
        "//xla/client:xla_builder",
        "//xla/client/lib:constants",
        "//xla/ffi",
        "//xla/ffi:execution_context",
        "//xla/ffi:ffi_api",
        "//xla/hlo/ir:hlo",
        "//xla/service:custom_call_status",
        "//xla/service:custom_call_target_registry",
        "//xla/service:executable",
        "//xla/stream_executor",
        "//xla/stream_executor/gpu:gpu_types_header",
        "//xla/tests:client_library_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
    ]) + if_rocm_is_configured([
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

xla_cc_test(
    name = "gpu_copy_insertion_test",
    srcs = ["gpu_copy_insertion_test.cc"],
    deps = [
        ":buffer_sharing",
        "//xla:test",
        "//xla:test_helpers",
        "//xla/hlo/ir:hlo",
        "//xla/service:copy_insertion",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "hlo_to_ir_bindings",
    srcs = ["hlo_to_ir_bindings.cc"],
    hdrs = ["hlo_to_ir_bindings.h"],
    deps = [
        ":buffer_allocations",
        ":ir_emission_utils",
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/llvm_ir:tuple_ops",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "target_util",
    srcs = ["target_util.cc"],
    hdrs = ["target_util.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service/llvm_ir:llvm_type_conversion_util",
        "//xla/service/llvm_ir:llvm_util",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@local_tsl//tsl/platform:logging",
    ],
)

xla_cc_test(
    name = "target_util_test",
    srcs = ["target_util_test.cc"],
    deps = [
        ":target_util",
        "//xla/tests:xla_internal_test_main",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "gpu_device_info_for_tests",
    testonly = 1,
    srcs = ["gpu_device_info_for_tests.cc"],
    hdrs = ["gpu_device_info_for_tests.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla/stream_executor:device_description",
    ],
)

cc_library(
    name = "ir_emitter_context",
    srcs = ["ir_emitter_context.cc"],
    hdrs = ["ir_emitter_context.h"],
    deps = [
        ":execution_stream_assignment",
        ":gpu_constants",
        ":gpu_executable",
        ":ir_emission_utils",
        ":kernel_reuse_cache",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:name_uniquer",
        "//xla/service/gpu/runtime:nccl_collective_thunk",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/strings:string_view",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
    ],
)

cc_library(
    name = "ir_emitter_unnested",
    srcs = ["ir_emitter_unnested.cc"],
    hdrs = ["ir_emitter_unnested.h"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]) + if_rocm_hipblaslt([
        "TF_HIPBLASLT=1",
    ]),
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":execution_stream_assignment",
        ":gpu_asm_opts_util",
        ":gpu_conv_runner",
        ":gpu_fused_mha_runner",
        ":gpu_norm_runner",
        ":hlo_fusion_analysis",
        ":ir_emission_utils",
        ":ir_emitter",
        ":ir_emitter_context",
        ":kernel_arguments",
        ":kernel_reuse_cache",
        ":launch_dimensions",
        ":matmul_utils",
        ":parallel_loop_emitter",
        ":triton_call",
        "//xla:autotuning_proto_cc",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/ffi:attribute_map",
        "//xla/ffi:ffi_api",
        "//xla/ffi/api:c_api",
        "//xla/hlo/ir:hlo",
        "//xla/mlir_hlo:transforms_gpu_passes",
        "//xla/service:buffer_assignment",
        "//xla/service:collective_ops_utils",
        "//xla/service:custom_call_status",
        "//xla/service:custom_call_target_registry",
        "//xla/service:global_device_id",
        "//xla/service:name_uniquer",
        "//xla/service/gpu/fusions:fusion_emitter",
        "//xla/service/gpu/fusions:thunk_util",
        "//xla/service/gpu/kernels:custom_kernel",
        "//xla/service/gpu/kernels:topk_custom_kernel",
        "//xla/service/gpu/runtime:command_buffer_cmd",
        "//xla/service/gpu/runtime:command_buffer_cmd_emitter",
        "//xla/service/gpu/runtime:command_buffer_thunk",
        "//xla/service/gpu/runtime:conditional_thunk",
        "//xla/service/gpu/runtime:convolution_thunk",
        "//xla/service/gpu/runtime:copy_thunk",
        "//xla/service/gpu/runtime:custom_call_thunk",
        "//xla/service/gpu/runtime:fft_thunk",
        "//xla/service/gpu/runtime:fused_mha_thunk",
        "//xla/service/gpu/runtime:gemm_thunk",
        "//xla/service/gpu/runtime:infeed_thunk",
        "//xla/service/gpu/runtime:kernel_thunk",
        "//xla/service/gpu/runtime:nccl_all_gather_thunk",
        "//xla/service/gpu/runtime:nccl_all_reduce_thunk",
        "//xla/service/gpu/runtime:nccl_all_to_all_thunk",
        "//xla/service/gpu/runtime:nccl_api",
        "//xla/service/gpu/runtime:nccl_collective_broadcast_thunk",
        "//xla/service/gpu/runtime:nccl_collective_permute_thunk",
        "//xla/service/gpu/runtime:nccl_collective_thunk",
        "//xla/service/gpu/runtime:nccl_p2p_thunk_common",
        "//xla/service/gpu/runtime:nccl_recv_thunk",
        "//xla/service/gpu/runtime:nccl_send_thunk",
        "//xla/service/gpu/runtime:norm_thunk",
        "//xla/service/gpu/runtime:outfeed_thunk",
        "//xla/service/gpu/runtime:replica_id_thunk",
        "//xla/service/gpu/runtime:send_recv_thunk",
        "//xla/service/gpu/runtime:sequential_thunk",
        "//xla/service/gpu/runtime:thunk",
        "//xla/service/gpu/runtime:wait_for_streams_thunk",
        "//xla/service/gpu/runtime:while_thunk",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:kernel_support_library",
        "//xla/service/llvm_ir:llvm_loop",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/llvm_ir:loop_emitter",
        "//xla/service/llvm_ir:sort_util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:launch_dim",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "//xla/stream_executor/integrations:device_mem_allocator",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Linker",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:AsmParser",
        "@llvm-project//mlir:BuiltinToLLVMIRTranslation",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:LLVMDialect",
        "@llvm-project//mlir:LLVMToLLVMIRTranslation",
        "@llvm-project//mlir:MemRefTransforms",
        "@llvm-project//mlir:NVVMToLLVMIRTranslation",
        "@llvm-project//mlir:Parser",
        "@llvm-project//mlir:ROCDLToLLVMIRTranslation",
        "@llvm-project//mlir:Support",
        "@llvm-project//mlir:ToLLVMIRTranslation",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:human_readable_json",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/protobuf:dnn_proto_cc",
        "@triton//:TritonDialects",
    ] + if_gpu_is_configured([
        ":ir_emitter_triton",
        "//xla/service/gpu/fusions",
        "//xla/service/gpu/runtime:cholesky_thunk",
        "//xla/service/gpu/runtime:cub_sort_thunk",
        "//xla/service/gpu/runtime:gpublas_lt_matmul_thunk",
        "//xla/service/gpu/runtime:triangular_solve_thunk",
    ]) + if_rocm_is_configured([
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

cc_library(
    name = "ir_emitter",
    srcs = [
        "elemental_ir_emitter.cc",
        "ir_emitter.cc",
        "ir_emitter_nested.cc",
    ],
    hdrs = [
        "elemental_ir_emitter.h",
        "ir_emitter.h",
        "ir_emitter_nested.h",
    ],
    copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]),
    deps = [
        ":backend_configs_cc",
        ":hlo_to_ir_bindings",
        ":ir_emission_utils",
        ":ir_emitter_context",
        ":kernel_reuse_cache",
        ":target_util",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:elemental_ir_emitter",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "//xla/service/llvm_ir:fused_ir_emitter",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:ir_builder_mixin",
        "//xla/service/llvm_ir:kernel_support_library",
        "//xla/service/llvm_ir:llvm_loop",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/llvm_ir:loop_emitter",
        "//xla/service/llvm_ir:math_ops",
        "//xla/service/llvm_ir:tuple_ops",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/hash",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "ir_emitter_triton",
    srcs = if_gpu_is_configured(["ir_emitter_triton.cc"]) + if_cuda_is_configured(["ir_emitter_triton_cuda.cc"]) + if_rocm_is_configured([
        "ir_emitter_triton_rocm.cc",
    ]),
    hdrs = if_gpu_is_configured(["ir_emitter_triton.h"]),
    deps = [
        ":hlo_traversal",
        ":ir_emission_utils",
        ":launch_dimensions",
        ":matmul_utils",
        ":target_util",
        ":triton_fusion_analysis",
        ":triton_tiling_propagation",
        "//xla:autotuning_proto_cc",
        "//xla:comparison_util",
        "//xla:debug_options_flags",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/mlir_hlo",
        "//xla/mlir_hlo:map_mhlo_to_scalar_op",
        "//xla/service:algorithm_util",
        "//xla/service:dump",
        "//xla/service:hlo_module_config",
        "//xla/service:instruction_fusion",
        "//xla/service/gpu/fusions/mlir:elemental_hlo_to_mlir",
        "//xla/service/gpu/fusions/mlir:passes",
        "//xla/service/gpu/fusions/mlir/ir:xla_gpu",
        "//xla/service/gpu/llvm_gpu_backend",
        "//xla/service/gpu/model:affine_map_printer",
        "//xla/service/gpu/model:indexing_analysis",
        "//xla/service/gpu/model:symbolic_tile_analysis",
        "//xla/service/gpu/model:symbolic_tiled_hlo_instruction",
        "//xla/service/gpu/model:tiled_hlo_computation",
        "//xla/service/gpu/model:tiled_hlo_instruction",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:launch_dim",
        "//xla/translate/hlo_to_mhlo:hlo_module_importer",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Linker",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:AffineDialect",
        "@llvm-project//mlir:AffineToStandard",
        "@llvm-project//mlir:ArithDialect",
        "@llvm-project//mlir:ArithToLLVM",
        "@llvm-project//mlir:BuiltinToLLVMIRTranslation",
        "@llvm-project//mlir:ControlFlowToLLVM",
        "@llvm-project//mlir:ExecutionEngineUtils",
        "@llvm-project//mlir:FuncDialect",
        "@llvm-project//mlir:FuncExtensions",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:IndexToLLVM",
        "@llvm-project//mlir:LLVMDialect",
        "@llvm-project//mlir:LLVMToLLVMIRTranslation",
        "@llvm-project//mlir:MathDialect",
        "@llvm-project//mlir:NVVMDialect",
        "@llvm-project//mlir:NVVMToLLVMIRTranslation",
        "@llvm-project//mlir:Pass",
        "@llvm-project//mlir:ROCDLToLLVMIRTranslation",
        "@llvm-project//mlir:SCFDialect",
        "@llvm-project//mlir:SCFToControlFlow",
        "@llvm-project//mlir:Support",
        "@llvm-project//mlir:ToLLVMIRTranslation",
        "@llvm-project//mlir:Transforms",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:tensor_float_32_utils",
        "@triton//:TritonDialects",
        "@triton//:TritonTransforms",
    ] + if_gpu_is_configured([
        "@triton//:TritonNvidiaGPUTransforms",
        "@triton//:TritonGPUToLLVM",
        "@triton//:TritonToTritonGPU",
        "@triton//:TritonGPUTransforms",
    ]) + if_cuda_is_configured([
        "@triton//third_party/nvidia:NVGPUToLLVM",
        "@triton//third_party/nvidia:TritonNVIDIAGPUToLLVM",
        "@triton//:TritonLLVMIR",
    ]) + if_rocm_is_configured([
        "@local_tsl//tsl/platform:rocm_rocdl_path",
    ]),
)

xla_test(
    name = "ir_emitter_triton_test",
    srcs = if_gpu_is_configured(["ir_emitter_triton_test.cc"]),
    backends = [
        "gpu_a100",
        "gpu_h100",
        "gpu_amd_any",
    ],
    shard_count = 20,
    tags = [
        "no_rocm",
        "nomac",
    ],
    deps = [
        ":backend_configs_cc",
        ":gpu_device_info_for_tests",
        ":ir_emitter_triton",
        ":matmul_utils",
        ":triton_fusion_analysis",
        "//xla:autotuning_proto_cc",
        "//xla:error_spec",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:status_macros",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/cuda:cublas_plugin",
        "//xla/tests:filecheck",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Pass",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_test(
    name = "ir_emitter_triton_mem_utils_test",
    srcs = if_cuda_is_configured(["ir_emitter_triton_mem_utils_test.cc"]),
    deps = [
        ":ir_emitter_triton",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/fusions/mlir/ir:xla_gpu",
        "//xla/service/gpu/model:indexing_analysis",
        "//xla/service/gpu/model:tiled_hlo_instruction",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@llvm-project//mlir:AffineDialect",
        "@llvm-project//mlir:ArithDialect",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:NVVMDialect",
        "@llvm-project//mlir:Support",
        "@local_tsl//tsl/platform:logging",
        "@triton//:TritonDialects",
    ],
)

xla_test(
    name = "ir_emitter_triton_large_test",
    srcs = if_gpu_is_configured(["ir_emitter_triton_large_test.cc"]),
    backends = [
        "gpu_a100",
        "gpu_h100",
        "gpu_amd_any",
    ],
    tags = [
        "large",
        "no_oss",  # requires-mem:16g tag doesn't work in open source
        "nomac",
        "requires-mem:16g",
    ],
    deps = [
        "//xla:error_spec",
        "//xla:xla_proto_cc",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/log:check",
        "@com_google_googletest//:gtest",
    ],
)

xla_test(
    name = "ir_emitter_triton_parametrized_test",
    srcs = if_gpu_is_configured(["ir_emitter_triton_parametrized_test.cc"]),
    backends = [
        "gpu_a100",
        "gpu_h100",
        "gpu_amd_any",
    ],
    shard_count = 10,
    tags = ["nomac"],
    deps = [
        ":triton_support",
        "//xla:comparison_util",
        "//xla:error_spec",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/cuda:cublas_plugin",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "gemm_fusion_autotuner",
    srcs = if_cuda_is_configured(["gemm_fusion_autotuner.cc"]),
    hdrs = if_cuda_is_configured(["gemm_fusion_autotuner.h"]),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = if_cuda_is_configured([
        ":autotuner_compile_util",
        ":autotuner_util",
        ":backend_configs_cc",
        ":buffer_comparator",
        ":gemm_rewriter",
        ":gpu_float_support",
        ":gpu_fusible",
        ":instruction_fusion",
        ":ir_emission_utils",
        ":matmul_utils",
        ":split_k_gemm_rewriter",
        ":stream_executor_util",
        ":cudnn_fusion_compiler",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@local_config_cuda//cuda:cuda_headers",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla/tools:hlo_decomposer_lib",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:algorithm_util",
        "//xla/service:dump",
        "//xla/service:executable",
        "//xla/service:float_normalization",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "//xla/service:shaped_buffer",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor",
        "//xla/stream_executor/gpu:redzone_allocator",
        "@local_tsl//tsl/lib/core:bits",
        "@local_tsl//tsl/platform:blocking_counter",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "//xla/tsl/util/proto:proto_utils",
        "//xla/service/gpu:hlo_traversal",
    ]) + [
        "//xla/stream_executor:stream_executor_memory_allocator",
        "@com_google_absl//absl/status",
        "@local_tsl//tsl/platform:path",
    ],
)

xla_test(
    name = "gemm_fusion_autotuner_test",
    srcs = if_cuda_is_configured(["gemm_fusion_autotuner_test.cc"]),
    backend_tags = {"gpu": [
        "requires-gpu-sm80",
    ]},
    backends = [
        "gpu",
    ],
    tags = [
        "nomac",
    ],
    deps = [
        ":autotuner_util",
        ":backend_configs_cc",
        ":gemm_fusion",
        ":gemm_fusion_autotuner",
        ":ir_emission_utils",
        ":matmul_utils",
        "//xla:autotuning_proto_cc",
        "//xla:error_spec",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor:device_description",
        "//xla/tests:filecheck",
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_utils",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "//xla/tools:hlo_decomposer_lib",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:platform_port",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
    ]),
)

cc_library(
    name = "triton_call",
    srcs = if_gpu_is_configured(["triton_call.cc"]),
    hdrs = ["triton_call.h"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = [
        "@llvm-project//mlir:AsmParser",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Parser",
        "@llvm-project//mlir:Support",
    ],
)

cc_library(
    name = "parallel_loop_emitter",
    srcs = ["parallel_loop_emitter.cc"],
    hdrs = ["parallel_loop_emitter.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":launch_dimensions",
        ":target_util",
        "//xla:shape_util",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:kernel_support_library",
        "//xla/service/llvm_ir:llvm_loop",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/llvm_ir:loop_emitter",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "buffer_allocations",
    srcs = ["buffer_allocations.cc"],
    hdrs = ["buffer_allocations.h"],
    deps = [
        "//xla/service:buffer_assignment",
        "//xla/stream_executor",
        "//xla/stream_executor:device_memory_allocator",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:logging",
    ],
)

# TODO(b/244780257): Remove this config.
bool_flag(
    name = "enable_xlir",
    build_setting_default = if_google(True, False),
)

cc_library(
    name = "gpu_executable",
    srcs = [
        "gpu_executable.cc",
    ],
    hdrs = [
        "gpu_executable.h",
    ],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = [
        ":backend_configs_cc",
        ":buffer_allocations",
        ":gpu_constants",
        ":gpu_executable_run_options",
        ":ir_emission_utils",
        ":stream_executor_util",
        "//xla:executable_run_options",
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:executable",
        "//xla/service:hlo_execution_profile",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_parser",
        "//xla/service:hlo_value",
        "//xla/service:maybe_owning_device_memory",
        "//xla/service:rendezvous",
        "//xla/service:shaped_buffer",
        "//xla/service:stream_pool",
        "//xla/service:xla_debug_info_manager",
        "//xla/service/gpu/runtime:annotation",
        "//xla/service/gpu/runtime:for_all_thunks",
        "//xla/service/gpu/runtime:nccl_clique",
        "//xla/service/gpu/runtime:nccl_clique_key",
        "//xla/service/gpu/runtime:thunk",
        "//xla/stream_executor",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor:scoped_module_handle",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/gpu:gpu_activation",
        "//xla/stream_executor/gpu:gpu_executor_header",
        "//xla/stream_executor/gpu:gpu_stream_header",
        "//xla/stream_executor/gpu:gpu_timer",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/cleanup",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@com_google_absl//absl/types:variant",
        "@llvm-project//llvm:Support",
        "@llvm-project//mlir:FuncDialect",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Parser",
        "@llvm-project//mlir:Support",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:random",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
    ] + if_gpu_is_configured([
        ":make_batch_pointers",
    ]) + if_cuda_is_configured([
        "//xla/stream_executor/cuda:cublas_plugin",
        "//xla/stream_executor/cuda:cuda_stream",
        "//xla/stream_executor/cuda:cudnn_plugin",
        "//xla/stream_executor/cuda:cufft_plugin",
        "//xla/stream_executor/cuda:stream_executor_cuda",
        "@local_config_cuda//cuda:cuda_headers",
    ]) + if_rocm_is_configured([
        "//xla/stream_executor/rocm:stream_executor_rocm",
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

cc_library(
    name = "ir_emission_utils",
    srcs = ["ir_emission_utils.cc"],
    hdrs = ["ir_emission_utils.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":hlo_traversal",
        ":target_util",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:hlo_parser",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "//xla/service/llvm_ir:llvm_type_conversion_util",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/translate/mhlo_to_hlo:location_exporter",
        "//xla/translate/mhlo_to_hlo:type_to_shape",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:MemRefDialect",
        "@llvm-project//mlir:SideEffectInterfaces",
        "@llvm-project//mlir:Support",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "ir_emission_utils_test",
    srcs = ["ir_emission_utils_test.cc"],
    deps = [
        ":hlo_traversal",
        ":ir_emission_utils",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:types",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/mlir_hlo",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "//xla/translate/hlo_to_mhlo:hlo_utils",
        "@llvm-project//mlir:FuncDialect",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Parser",
        "@llvm-project//mlir:Support",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "reduction_utils",
    srcs = ["reduction_utils.cc"],
    hdrs = ["reduction_utils.h"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_module_config",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:logging",
    ] + if_cuda_is_configured([
        ":gpu_asm_opts_util",
        "//xla/stream_executor/cuda:cuda_asm_compiler",
    ]),
)

xla_cc_test(
    name = "reduction_utils_test",
    srcs = ["reduction_utils_test.cc"],
    deps = [
        ":reduction_utils",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "cublas_cudnn",
    srcs = ["cublas_cudnn.cc"],
    hdrs = ["cublas_cudnn.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:statusor",
    ],
)

gpu_kernel_library(
    name = "gpu_prim",
    hdrs = ["gpu_prim.h"],
    deps = [
        "@eigen_archive//:eigen3",
        "@local_config_cuda//cuda:cuda_headers",
        "@local_tsl//tsl/platform:bfloat16",
    ] + if_cuda_is_configured(xla_cub_deps()) + if_rocm_is_configured([
        "@local_config_rocm//rocm:rocprim",
    ]),
)

cc_library(
    name = "variant_visitor",
    hdrs = ["variant_visitor.h"],
)

build_cub_sort_kernels(
    name = "cub_sort_kernel",
    srcs = if_gpu_is_configured(["cub_sort_kernel.cu.cc"]),
    hdrs = if_gpu_is_configured(["cub_sort_kernel.h"]),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    types = get_cub_sort_kernel_types(),
    deps = if_gpu_is_configured([
        ":gpu_prim",
    ]),
)

cc_library(
    name = "gemm_rewriter",
    srcs = ["gemm_rewriter.cc"],
    hdrs = ["gemm_rewriter.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":ir_emission_utils",
        ":matmul_utils",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/evaluator:hlo_evaluator",
        "//xla/hlo/ir:hlo",
        "//xla/service:algorithm_util",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/protobuf:dnn_proto_cc",
    ],
)

cc_library(
    name = "triton_support",
    srcs = ["triton_support.cc"],
    hdrs = ["triton_support.h"],
    deps = [
        ":variant_visitor",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log:check",
        "@local_tsl//tsl/platform:tensor_float_32_utils",
    ],
)

xla_test(
    name = "triton_support_test",
    srcs = if_gpu_is_configured(["triton_support_test.cc"]),
    backends = [
        "gpu_a100",
        "gpu_amd_any",
    ],
    shard_count = 10,
    tags = ["nomac"],
    deps = [
        ":gpu_device_info_for_tests",
        ":gpu_float_support",
        ":ir_emitter_triton",
        ":matmul_utils",
        ":triton_fusion_analysis",
        ":triton_support",
        "//xla:error_spec",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:float_normalization",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "triton_tiling_propagation",
    srcs = ["triton_tiling_propagation.cc"],
    hdrs = ["triton_tiling_propagation.h"],
    deps = [
        ":triton_support",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "triton_tiling_propagation_test",
    srcs = ["triton_tiling_propagation_test.cc"],
    deps = [
        ":triton_tiling_propagation",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "triton_fusion_analysis",
    srcs = ["triton_fusion_analysis.cc"],
    hdrs = ["triton_fusion_analysis.h"],
    deps = [
        ":cudnn_support_utils",
        ":matmul_utils",
        ":triton_tiling_propagation",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:instruction_fusion",
        "//xla/tools:hlo_decomposer_lib",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "triton_fusion_analysis_test",
    srcs = ["triton_fusion_analysis_test.cc"],
    deps = [
        ":gemm_fusion",
        ":triton_fusion_analysis",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gemm_fusion",
    srcs = ["gemm_fusion.cc"],
    hdrs = ["gemm_fusion.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_padding_requirements",
        ":ir_emission_utils",
        ":matmul_utils",
        ":triton_fusion_analysis",
        ":triton_support",
        ":triton_tiling_propagation",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:tensor_float_32_utils",
    ],
)

xla_cc_test(
    name = "gemm_fusion_test",
    srcs = ["gemm_fusion_test.cc"],
    deps = [
        ":cublas_padding_requirements",
        ":gemm_fusion",
        ":triton_fusion_analysis",
        "//xla:autotuning_proto_cc",
        "//xla:statusor",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor:device_description",
        "//xla/tests:filecheck",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gemv_rewriter",
    srcs = ["gemv_rewriter.cc"],
    hdrs = ["gemv_rewriter.h"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gemv_rewriter_test",
    srcs = ["gemv_rewriter_test.cc"],
    deps = [
        ":gemv_rewriter",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "split_k_gemm_rewriter",
    srcs = ["split_k_gemm_rewriter.cc"],
    hdrs = ["split_k_gemm_rewriter.h"],
    deps = [
        ":ir_emission_utils",
        ":matmul_utils",
        ":triton_fusion_analysis",
        ":triton_support",
        ":triton_tiling_propagation",
        "//xla:autotuning_proto_cc",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:hlo_creation_utils",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "split_k_gemm_rewriter_test",
    srcs = ["split_k_gemm_rewriter_test.cc"],
    deps = [
        ":matmul_utils",
        ":split_k_gemm_rewriter",
        ":triton_fusion_analysis",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_assignment",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "softmax_rewriter_triton",
    srcs = ["softmax_rewriter_triton.cc"],
    hdrs = ["softmax_rewriter_triton.h"],
    deps = [
        ":backend_configs_cc",
        ":ir_emission_utils",
        ":triton_support",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:hlo_pass",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gemm_algorithm_picker",
    srcs = if_gpu_is_configured(["gemm_algorithm_picker.cc"]),
    hdrs = if_gpu_is_configured(["gemm_algorithm_picker.h"]),
    deps = if_gpu_is_configured([
        ":backend_configs_cc",
        ":buffer_comparator",
        ":cublas_cudnn",
        ":gpu_asm_opts_util",
        ":gpu_conv_runner",
        ":ir_emission_utils",
        ":matmul_utils",
        ":stream_executor_util",
        ":variant_visitor",
        ":autotuner_compile_util",
        ":autotuner_util",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/types:span",
        "//xla:autotune_results_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "//xla:status_macros",
        "//xla/stream_executor",
        "//xla/stream_executor:blas",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla/tsl/util/proto:proto_utils",
        "//xla:util",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:statusor",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
    ]) + ["@com_google_absl//absl/status"],
)

cc_library(
    name = "autotuner_util",
    srcs = if_gpu_is_configured(["autotuner_util.cc"]),
    hdrs = if_gpu_is_configured(["autotuner_util.h"]),
    deps = if_gpu_is_configured([
        ":gpu_asm_opts_util",
        ":stream_executor_util",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "//xla/hlo/ir:hlo",
        "//xla/service:compilation_environments",
        "//xla/stream_executor",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_proto_cc",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:statusor",
    ]) + [
        "//xla/stream_executor:stream_executor_memory_allocator",
        "@com_google_absl//absl/status",
    ],
)

# We need a separate target, as runtime executable cannot depend on compilation
# pipeline.
cc_library(
    name = "autotuner_compile_util",
    srcs = if_gpu_is_configured(["autotuner_compile_util.cc"]),
    hdrs = if_gpu_is_configured(["autotuner_compile_util.h"]),
    deps = if_gpu_is_configured([
        ":autotuner_util",
        ":gpu_executable_run_options",
        ":ir_emission_utils",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "//xla/hlo/ir:hlo",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/service:maybe_owning_device_memory",
        "//xla/service:shaped_buffer",
        "//xla/stream_executor",
        "//xla/stream_executor/gpu:gpu_stream_header",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla:executable_run_options",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_proto_cc",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ]) + ["@com_google_absl//absl/status"],
)

xla_test(
    name = "autotuner_compile_util_test",
    srcs = if_gpu_is_configured(["autotuner_compile_util_test.cc"]),
    backends = ["gpu"],
    deps = if_gpu_is_configured(
        [
            ":autotuner_compile_util",
            ":autotuner_util",
            "@com_google_googletest//:gtest_main",
            "@com_google_absl//absl/strings",
            "@com_google_absl//absl/strings:string_view",
            "//xla/hlo/ir:hlo",
            "//xla/service:platform_util",
            "//xla/stream_executor:platform",
            "//xla/tests:hlo_test_base",
            "@local_tsl//tsl/platform:statusor",
        ],
        if_false = [
            "@com_google_googletest//:gtest_main",  # b/317293391
        ],
    ),
)

xla_test(
    name = "gemm_algorithm_picker_test",
    srcs = if_gpu_is_configured(["gemm_algorithm_picker_test.cc"]),
    backends = [
        "gpu_v100",
        "gpu_amd_any",
    ],
    deps = [
        ":autotuner_util",
        ":backend_configs_cc",
        ":gemm_algorithm_picker",
        ":gemm_rewriter",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:platform_util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
        "@local_tsl//tsl/protobuf:dnn_proto_cc",
    ],
)

cc_library(
    name = "matmul_utils",
    srcs = ["matmul_utils.cc"],
    hdrs = ["matmul_utils.h"],
    compatible_with = get_compatible_with_portable(),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = [
        ":backend_configs_cc",
        ":ir_emission_utils",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/mlir_hlo",
        "//xla/service:algorithm_util",
        "//xla/stream_executor",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ] + if_gpu_is_configured([
        #keep sorted
        "//xla/stream_executor:host_or_device_scalar",
    ]) + if_cuda_is_configured([
        #keep sorted
        "//xla/stream_executor/cuda:cublas_lt_header",
        "//xla/stream_executor/cuda:cublas_plugin",
        "@local_tsl//tsl/platform:tensor_float_32_hdr_lib",
    ]) + if_rocm_is_configured([
        #keep sorted
        "//xla/stream_executor/platform:dso_loader",
        "//xla/stream_executor/rocm:amdhipblaslt_plugin",
        "//xla/stream_executor/rocm:hipblas_lt_header",
        "@local_config_rocm//rocm:rocm_headers",
    ]) + if_static([
        "@local_tsl//tsl/platform:tensor_float_32_utils",
    ]),
)

xla_cc_test(
    name = "matmul_utils_test",
    srcs = ["matmul_utils_test.cc"],
    deps = [
        ":matmul_utils",
        "//xla:shape_util",
        "//xla:test",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "dot_dimension_sorter",
    srcs = ["dot_dimension_sorter.cc"],
    hdrs = ["dot_dimension_sorter.h"],
    deps = [
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

xla_test(
    name = "dot_dimension_sorter_test",
    srcs = ["dot_dimension_sorter_test.cc"],
    backends = ["gpu"],
    deps = [
        ":dot_dimension_sorter",
        "//xla:error_spec",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "dot_sparsity_rewriter",
    srcs = ["dot_sparsity_rewriter.cc"],
    hdrs = ["dot_sparsity_rewriter.h"],
    deps = [
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "dot_sparsity_rewriter_test",
    srcs = ["dot_sparsity_rewriter_test.cc"],
    deps = [
        ":dot_sparsity_rewriter",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_async_collective_annotator",
    srcs = ["gpu_async_collective_annotator.cc"],
    hdrs = ["gpu_async_collective_annotator.h"],
    deps = [
        ":backend_configs_cc",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gpu_async_collective_annotator_test",
    srcs = ["gpu_async_collective_annotator_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":gpu_async_collective_annotator",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_macros_header",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_convert_async_collectives_to_sync",
    srcs = ["gpu_convert_async_collectives_to_sync.cc"],
    hdrs = ["gpu_convert_async_collectives_to_sync.h"],
    deps = [
        ":backend_configs_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:convert_async_collectives_to_sync",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gpu_convert_async_collectives_to_sync_test",
    srcs = ["gpu_convert_async_collectives_to_sync_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":gpu_convert_async_collectives_to_sync",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "conv_algorithm_picker",
    srcs = if_gpu_is_configured(["conv_algorithm_picker.cc"]),
    hdrs = if_gpu_is_configured(["conv_algorithm_picker.h"]),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = if_gpu_is_configured([
        ":autotuner_compile_util",
        ":autotuner_util",
        ":backend_configs_cc",
        ":buffer_comparator",
        ":cublas_cudnn",
        ":gpu_asm_opts_util",
        ":gpu_autotuning_proto_cc",
        ":gpu_conv_runner",
        ":hlo_algorithm_denylist",
        ":stream_executor_util",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@local_config_cuda//cuda:cudnn_header",
        "//xla:autotune_results_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:debug_options_flags",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "//xla/service:slow_operation_alarm",
        "//xla/stream_executor",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor:lazy_op_runner",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/gpu:redzone_allocator",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:numbers",
        "//xla/tsl/util:env_var",
        "@local_tsl//tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_utils",
        "@local_tsl//tsl/platform:status",
    ]) + ["@com_google_absl//absl/status"],
)

xla_test(
    name = "conv_algorithm_picker_test",
    srcs = if_gpu_is_configured(["conv_algorithm_picker_test.cc"]),
    backends = [
        "gpu_v100",
        "gpu_amd_any",
    ],
    tags = [
        "noasan",
        "nomsan",
    ],
    deps = [
        ":autotuner_util",
        ":conv_algorithm_picker",
        ":gpu_conv_rewriter",
        "//xla:debug_options_flags",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:platform_util",
        "//xla/service:tuple_simplifier",
        "//xla/stream_executor:platform",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "gpu_conv_runner",
    srcs = ["gpu_conv_runner.cc"],
    hdrs = ["gpu_conv_runner.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":stream_executor_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:lazy_op_runner",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@eigen_archive//:eigen3",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_norm_runner",
    srcs = ["gpu_norm_runner.cc"],
    hdrs = ["gpu_norm_runner.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":stream_executor_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:lazy_op_runner",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:statusor",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
    ]),
)

cc_library(
    name = "gpu_fused_mha_runner",
    srcs = ["gpu_fused_mha_runner.cc"],
    hdrs = ["gpu_fused_mha_runner.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":stream_executor_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/stream_executor",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:lazy_op_runner",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@eigen_archive//:eigen3",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_conv_rewriter",
    srcs = ["gpu_conv_rewriter.cc"],
    hdrs = ["gpu_conv_rewriter.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:window_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_sort_rewriter",
    srcs = if_gpu_is_configured(["gpu_sort_rewriter.cc"]),
    hdrs = if_gpu_is_configured(["gpu_sort_rewriter.h"]),
    deps = [
        ":cublas_cudnn",
        "//xla:comparison_util",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service/gpu/runtime:cub_sort_thunk",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "move_copy_to_users",
    srcs = ["move_copy_to_users.cc"],
    hdrs = ["move_copy_to_users.h"],
    deps = [
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "move_copy_to_users_test",
    srcs = ["move_copy_to_users_test.cc"],
    deps = [
        ":move_copy_to_users",
        "//xla/service:layout_assignment",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
    ],
)

xla_cc_test(
    name = "gpu_conv_rewriter_test",
    srcs = ["gpu_conv_rewriter_test.cc"],
    deps = [
        ":cublas_cudnn",
        ":gpu_conv_rewriter",
        "//xla:array4d",
        "//xla:literal_util",
        "//xla:protobuf_util",
        "//xla:shape_util",
        "//xla:test",
        "//xla:test_helpers",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:shape_inference",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:str_format",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

xla_test(
    name = "gpu_sort_rewriter_test",
    srcs = if_cuda_is_configured(["gpu_sort_rewriter_test.cc"]),
    backends = ["gpu"],
    tags = ["no_oss"],
    deps = [
        ":cublas_cudnn",
        ":gpu_sort_rewriter",
        "//xla:statusor",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "cusolver_context",
    srcs = if_gpu_is_configured(["cusolver_context.cc"]),
    hdrs = if_gpu_is_configured(["cusolver_context.h"]),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = [
        "//xla:comparison_util",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/stream_executor",
        "//xla/stream_executor:blas",
        "//xla/stream_executor/gpu:gpu_stream",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
        "//xla/tsl/cuda:cusolver",
    ]) + if_rocm_is_configured([
        "@local_config_rocm//rocm:rocm_headers",
        "//xla/stream_executor/rocm:rocblas_wrapper",
        "//xla/stream_executor/rocm:rocsolver_wrapper",
        "//xla/stream_executor/rocm:hipsolver_wrapper",
    ]),
)

cc_library(
    name = "cusolver_rewriter",
    srcs = if_gpu_is_configured(["cusolver_rewriter.cc"]),
    hdrs = if_gpu_is_configured(["cusolver_rewriter.h"]),
    deps = if_gpu_is_configured([
        ":cusolver_context",
        ":ir_emission_utils",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "//xla:comparison_util",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/stream_executor",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_memory_allocator",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ]),
)

cc_library(
    name = "instruction_fusion",
    srcs = ["instruction_fusion.cc"],
    hdrs = ["instruction_fusion.h"],
    deps = [
        ":gpu_fusible",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:fusion_node_indexing_evaluation",
        "//xla/service:fusion_queue",
        "//xla/service:hlo_pass",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/meta:type_traits",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "instruction_fusion_test",
    srcs = ["instruction_fusion_test.cc"],
    tags = [
        "nomsan",
        "not_run:arm",
    ],
    deps = [
        ":gpu_device_info_for_tests",
        ":gpu_fusible",
        ":instruction_fusion",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_utils",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:statusor",
    ],
)

tf_proto_library(
    name = "fusion_process_dump_proto",
    srcs = ["fusion_process_dump.proto"],
    cc_api_version = 2,
    protodeps = [
        "//xla/stream_executor:device_description_proto",
    ],
)

cc_library(
    name = "fusion_process_dump",
    srcs = ["fusion_process_dump.cc"],
    hdrs = ["fusion_process_dump.h"],
    deps = [
        ":fusion_process_dump_proto_cc",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_graph_dumper",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/tools:hlo_module_loader",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "fusion_process_dump_test",
    srcs = ["fusion_process_dump_test.cc"],
    deps = [
        ":fusion_process_dump",
        ":fusion_process_dump_proto_cc",
        ":gpu_device_info_for_tests",
        "//xla:test",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "priority_fusion",
    srcs = ["priority_fusion.cc"],
    hdrs = ["priority_fusion.h"],
    deps = [
        ":backend_configs_cc",
        ":fusion_process_dump_proto_cc",
        ":gpu_fusible",
        ":hlo_fusion_analysis",
        ":hlo_traversal",
        ":triton_fusion_analysis",
        "//xla:debug_options_flags",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:dump",
        "//xla/service:fusion_queue",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_graph_dumper",
        "//xla/service:hlo_pass",
        "//xla/service:instruction_fusion",
        "//xla/service/gpu/model:fusion_analysis_cache",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/service/gpu/model:gpu_performance_model",
        "//xla/service/gpu/model:gpu_performance_model_base",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/meta:type_traits",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:blocking_counter",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status",
    ],
)

xla_cc_test(
    name = "priority_fusion_test",
    srcs = ["priority_fusion_test.cc"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    tags = ["no_pip"],
    deps = [
        ":backend_configs_cc",
        ":gpu_device_info_for_tests",
        ":gpu_fusible",
        ":hlo_fusion_analysis",
        ":priority_fusion",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "multi_output_fusion",
    srcs = ["multi_output_fusion.cc"],
    hdrs = ["multi_output_fusion.h"],
    deps = [
        ":gpu_fusible",
        "//xla:debug_options_flags",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/ir:hlo_dfs_reachability",
        "//xla/hlo/ir:hlo_reachability",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_graph_dumper",
        "//xla/service:hlo_pass",
        "//xla/service:instruction_fusion",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/service/gpu/model:gpu_performance_model",
        "//xla/service/gpu/model:gpu_performance_model_base",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "multi_output_fusion_test",
    srcs = ["multi_output_fusion_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":gpu_device_info_for_tests",
        ":gpu_fusible",
        ":multi_output_fusion",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
    ],
)

cc_library(
    name = "rename_fusions",
    srcs = ["rename_fusions.cc"],
    hdrs = ["rename_fusions.h"],
    deps = [
        ":hlo_traversal",
        ":ir_emission_utils",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:btree",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "rename_fusions_test",
    srcs = ["rename_fusions_test.cc"],
    deps = [
        ":rename_fusions",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
    ],
)

xla_cc_test(
    name = "softmax_rewriter_triton_test",
    srcs = ["softmax_rewriter_triton_test.cc"],
    deps = [
        ":softmax_rewriter_triton",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/service:instruction_fusion",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:status_matchers",
    ],
)

cc_library(
    name = "gpu_sanitize_constant_names",
    srcs = ["gpu_sanitize_constant_names.cc"],
    hdrs = ["gpu_sanitize_constant_names.h"],
    deps = [
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service:name_uniquer",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status",
    ],
)

xla_cc_test(
    name = "gpu_sanitize_constant_names_test",
    srcs = ["gpu_sanitize_constant_names_test.cc"],
    deps = [
        ":gpu_sanitize_constant_names",
        "//xla:literal_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "fusion_merger",
    srcs = ["fusion_merger.cc"],
    hdrs = ["fusion_merger.h"],
    deps = [
        ":gpu_fusible",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_graph_dumper",
        "//xla/service:hlo_pass",
        "//xla/service:instruction_fusion",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/service/gpu/model:gpu_performance_model",
        "//xla/service/gpu/model:gpu_performance_model_base",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:status",
    ],
)

xla_cc_test(
    name = "fusion_merger_test",
    srcs = ["fusion_merger_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":fusion_merger",
        ":gpu_device_info_for_tests",
        ":gpu_fusible",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "gpu_conv_padding_legalization",
    srcs = ["gpu_conv_padding_legalization.cc"],
    hdrs = ["gpu_conv_padding_legalization.h"],
    deps = [
        ":cublas_cudnn",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:window_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "//xla/service:shape_inference",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gpu_conv_padding_legalization_test",
    srcs = ["gpu_conv_padding_legalization_test.cc"],
    deps = [
        ":cublas_cudnn",
        ":gpu_conv_padding_legalization",
        "//xla:shape_util",
        "//xla:test",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "cudnn_support_utils",
    srcs = ["cudnn_support_utils.cc"],
    hdrs = ["cudnn_support_utils.h"],
    deps = [
        ":cublas_cudnn",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:window_util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/status:statusor",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "cudnn_support_utils_test",
    srcs = ["cudnn_support_utils_test.cc"],
    deps = [
        ":cudnn_support_utils",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:test",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "cudnn_pad_for_convolutions",
    srcs = ["cudnn_pad_for_convolutions.cc"],
    hdrs = ["cudnn_pad_for_convolutions.h"],
    deps = [
        ":cublas_cudnn",
        ":cudnn_support_utils",
        ":stream_executor_util",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/stream_executor",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/functional:bind_front",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "cudnn_pad_for_convolutions_test",
    srcs = ["cudnn_pad_for_convolutions_test.cc"],
    deps = [
        ":cublas_cudnn",
        ":cudnn_pad_for_convolutions",
        "//xla/service:hlo_parser",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "cudnn_vectorize_convolutions",
    srcs = ["cudnn_vectorize_convolutions.cc"],
    hdrs = ["cudnn_vectorize_convolutions.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":cudnn_support_utils",
        ":stream_executor_util",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla/client:xla_builder",
        "//xla/client:xla_computation",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "//xla/stream_executor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "cudnn_vectorize_convolutions_test",
    srcs = ["cudnn_vectorize_convolutions_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":cudnn_vectorize_convolutions",
        "//xla:statusor",
        "//xla:util",
        "//xla/service:call_inliner",
        "//xla/service:hlo_parser",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "cudnn_simplify_padding",
    srcs = ["cudnn_simplify_padding.cc"],
    hdrs = ["cudnn_simplify_padding.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        "//xla:literal",
        "//xla:statusor",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "cudnn_simplify_padding_test",
    srcs = ["cudnn_simplify_padding_test.cc"],
    deps = [
        ":cudnn_pad_for_convolutions",
        ":cudnn_simplify_padding",
        ":cudnn_vectorize_convolutions",
        "//xla:literal",
        "//xla:statusor",
        "//xla:util",
        "//xla/service:algebraic_simplifier",
        "//xla/service:call_inliner",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:reshape_mover",
        "//xla/service:tuple_simplifier",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/functional:function_ref",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "cublas_pad_for_gemms",
    srcs = ["cublas_pad_for_gemms.cc"],
    hdrs = ["cublas_pad_for_gemms.h"],
    deps = [
        ":gemm_fusion",
        ":ir_emission_utils",
        ":triton_support",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "cublas_padding_requirements",
    srcs = ["cublas_padding_requirements.cc"],
    hdrs = ["cublas_padding_requirements.h"],
    deps = [
        ":variant_visitor",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
    ],
)

xla_cc_test(
    name = "cublas_pad_for_gemms_test",
    srcs = ["cublas_pad_for_gemms_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":cublas_pad_for_gemms",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "cudnn_fusion_compiler",
    srcs = if_cuda_is_configured(["cudnn_fusion_compiler.cc"]),
    hdrs = if_cuda_is_configured(["cudnn_fusion_compiler.h"]),
    deps = if_cuda_is_configured([
        ":backend_configs_cc",
        ":cudnn_support_utils",
        ":ir_emission_utils",
        ":kernel_reuse_cache",
        ":matmul_utils",
        ":triton_fusion_analysis",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_config_cuda//cuda:cudnn_header",
        "//xla:shape_util",
        "//xla:comparison_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/stream_executor/cuda:cudnn_frontend_helpers",
        "//xla/stream_executor/cuda:cudnn_plugin",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ]) + ["@com_google_absl//absl/status"],
)

cc_library(
    name = "cudnn_workspace_rewriter",
    srcs = if_cuda_is_configured(["cudnn_workspace_rewriter.cc"]),
    hdrs = if_cuda_is_configured(["cudnn_workspace_rewriter.h"]),
    deps = if_cuda_is_configured([
        ":backend_configs_cc",
        ":ir_emission_utils",
        ":gpu_fused_mha_runner",
        ":cublas_cudnn",
        ":stream_executor_util",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_config_cuda//cuda:cudnn_header",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/stream_executor/cuda:cudnn_frontend_helpers",
        "//xla/stream_executor/cuda:cudnn_plugin",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
        "//xla:status_macros",
    ]) + ["@com_google_absl//absl/status"],
)

tf_proto_library(
    name = "executable_proto",
    srcs = ["executable.proto"],
    cc_api_version = 2,
    protodeps = [
        "//xla/service:hlo_proto",
        "//xla:xla_proto",
    ],
)

cc_library(
    name = "target_constants",
    hdrs = ["target_constants.h"],
)

cc_library(
    name = "gpu_transfer_manager",
    srcs = ["gpu_transfer_manager.cc"],
    hdrs = ["gpu_transfer_manager.h"],
    deps = [
        ":io_feed_manager",
        ":target_constants",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/service:compiler",
        "//xla/service:generic_transfer_manager",
        "//xla/service:shaped_buffer",
        "//xla/service:transfer_manager",
        "//xla/stream_executor",
        "//xla/stream_executor:memory_allocation",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/cleanup",
        "@com_google_absl//absl/container:node_hash_map",
        "@com_google_absl//absl/functional:function_ref",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@llvm-project//llvm:Core",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:numbers",
        "@local_tsl//tsl/platform:statusor",
    ],
    alwayslink = True,  # Contains per-platform transfer manager registration
)

cc_library(
    name = "gpu_reduce_scatter_creator",
    srcs = ["gpu_reduce_scatter_creator.cc"],
    hdrs = ["gpu_reduce_scatter_creator.h"],
    deps = [
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:collective_opt_utils",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
    ],
)

cc_library(
    name = "gpu_all_gather_optimizer",
    srcs = ["gpu_all_gather_optimizer.cc"],
    hdrs = ["gpu_all_gather_optimizer.h"],
    deps = [
        "//xla:shape_util",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/service:collective_ops_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "gpu_float_support",
    srcs = ["gpu_float_support.cc"],
    hdrs = ["gpu_float_support.h"],
    deps = [
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:float_support",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/log:check",
    ],
)

cc_library(
    name = "compile_module_to_llvm_ir",
    srcs = [
        "compile_module_to_llvm_ir.cc",
    ],
    hdrs = [
        "compile_module_to_llvm_ir.h",
    ],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":executable_proto_cc",
        ":execution_stream_assignment",
        ":gpu_constants",
        ":gpu_executable",
        ":gpu_memory_space_assignment",
        ":ir_emitter_context",
        ":ir_emitter_unnested",
        ":metrics",
        ":runtime_intrinsics",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:buffer_value",
        "//xla/service:dump",
        "//xla/service:hlo_dataflow_analysis",
        "//xla/service:hlo_ordering",
        "//xla/service:hlo_proto_cc",
        "//xla/service:logical_buffer",
        "//xla/service/gpu/runtime:conditional_thunk",
        "//xla/service/gpu/runtime:sequential_thunk",
        "//xla/service/gpu/runtime:thunk",
        "//xla/service/gpu/runtime:while_thunk",
        "//xla/stream_executor",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@llvm-project//llvm:AsmParser",
        "@llvm-project//llvm:TransformUtils",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Pass",
        "@llvm-project//mlir:Support",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "command_buffer_scheduling",
    srcs = ["command_buffer_scheduling.cc"],
    hdrs = ["command_buffer_scheduling.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":hlo_fusion_analysis",
        ":hlo_traversal",
        ":ir_emission_utils",
        ":variant_visitor",
        "//xla:shape_util",
        "//xla:util",
        "//xla/ffi:ffi_api",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "command_buffer_scheduling_test",
    srcs = ["command_buffer_scheduling_test.cc"],
    deps = [
        ":command_buffer_scheduling",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/stream_executor:device_description",
        "//xla/tests:filecheck",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "custom_kernel_fusion_rewriter",
    srcs = ["custom_kernel_fusion_rewriter.cc"],
    hdrs = ["custom_kernel_fusion_rewriter.h"],
    deps = [
        "//xla:shape_util",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service/gpu/kernels:custom_fusion_library",
        "//xla/service/gpu/kernels:custom_kernel_fusion_pattern",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "custom_kernel_fusion_rewriter_test",
    srcs = ["custom_kernel_fusion_rewriter_test.cc"],
    deps = [
        ":custom_kernel_fusion_rewriter",
        ":gpu_device_info_for_tests",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/kernels:custom_kernel_fusion_pattern",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "address_computation_fusion_rewriter",
    srcs = ["address_computation_fusion_rewriter.cc"],
    hdrs = ["address_computation_fusion_rewriter.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":gpu_constants",
        ":hlo_traversal",
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla:util",
        "//xla/ffi:ffi_api",
        "//xla/ffi/api:c_api",
        "//xla/hlo/ir:hlo",
        "//xla/service:custom_call_target_registry",
        "//xla/service:hlo_pass",
        "//xla/service/gpu/kernels:custom_fusion_library",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "address_computation_fusion_rewriter_test",
    srcs = if_cuda_is_configured(["address_computation_fusion_rewriter_test.cc"]),
    deps = [
        ":address_computation_fusion_rewriter",
        ":gpu_device_info_for_tests",
        "//xla:shape_util",
        "//xla/client:xla_builder",
        "//xla/client/lib:constants",
        "//xla/ffi",
        "//xla/ffi:ffi_api",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_value",
        "//xla/service:custom_call_target_registry",
        "//xla/service:executable",
        "//xla/service:hlo_memory_scheduler",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor",
        "//xla/stream_executor/gpu:gpu_types_header",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/status",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "fusion_pipeline",
    srcs = ["fusion_pipeline.cc"],
    hdrs = ["fusion_pipeline.h"],
    deps = [
        ":fusion_merger",
        ":horizontal_input_fusion",
        ":horizontal_loop_fusion",
        ":instruction_fusion",
        ":multi_output_fusion",
        ":priority_fusion",
        ":rename_fusions",
        ":variadic_op_splitter",
        "//xla:xla_proto_cc",
        "//xla/service:cpu_gpu_shape_verifier",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_cse",
        "//xla/service:hlo_dce",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_assignment",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/stream_executor:device_description",
        "@local_tsl//tsl/platform:env",
    ],
)

cc_library(
    name = "prepare_hlo_for_ir_emitting_pipeline",
    srcs = ["prepare_hlo_for_ir_emitting_pipeline.cc"],
    hdrs = ["prepare_hlo_for_ir_emitting_pipeline.h"],
    deps = [
        ":alias_passthrough_params",
        ":copy_fusion",
        ":gpu_sanitize_constant_names",
        ":horizontal_loop_fusion",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:copy_insertion",
        "//xla/service:cpu_gpu_shape_verifier",
        "//xla/service:hlo_dataflow_analysis",
        "//xla/service:hlo_dce",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_assignment",
        "//xla/service:loop_schedule_linearizer",
    ],
)

cc_library(
    name = "gpu_compiler",
    srcs = if_gpu_is_configured([
        "gpu_compiler.cc",
    ]),
    hdrs = if_gpu_is_configured([
        "gpu_compiler.h",
    ]),
    deps = if_gpu_is_configured([
        ":gpu_p2p_pipeliner",
        ":pipelined_p2p_rewriter",
        ":collective_permute_cycle_decomposer",
        ":address_computation_fusion_rewriter",
        ":gemv_rewriter",
        ":algorithm_checker",
        ":alias_passthrough_params",
        ":all_reduce_blueconnect",
        ":stream_attribute_async_wrapper",
        ":autotuner_util",
        ":buffer_sharing",
        ":compile_module_to_llvm_ir",
        ":conv_layout_normalization",
        ":copy_fusion",
        ":custom_kernel_fusion_rewriter",
        ":dot_dimension_sorter",
        ":dot_operand_converter",
        ":executable_proto_cc",
        ":fusion_merger",
        ":fusion_wrapper",
        ":gemm_broadcast_folding_rewriter",
        ":gemm_fusion",
        ":gemm_rewriter",
        ":gpu_algebraic_simplifier",
        ":gpu_all_gather_optimizer",
        ":gpu_async_collective_annotator",
        ":gpu_constants",
        ":gpu_conv_rewriter",
        ":gpu_convert_async_collectives_to_sync",
        ":gpu_executable",
        ":gpu_float_support",
        ":gpu_hlo_schedule",
        ":gpu_layout_assignment",
        ":gpu_reduce_scatter_creator",
        ":gpu_sanitize_constant_names",
        ":gpu_scatter_expander",
        ":gpu_windowed_einsum_handler",
        ":hlo_fusion_stats",
        ":horizontal_input_fusion",
        ":horizontal_loop_fusion",
        ":instruction_fusion",
        ":ir_emission_utils",
        ":ir_emitter",
        ":double_buffer_loop_unrolling",
        ":matmul_utils",
        ":metrics",
        ":move_copy_to_users",
        ":multi_output_fusion",
        ":priority_fusion",
        ":reduction_degenerate_dim_remover",
        ":reduction_dimension_grouper",
        ":reduction_layout_normalizer",
        ":reduction_splitter",
        ":reduction_utils",
        ":runtime_intrinsics",
        ":scatter_slice_simplifier",
        ":softmax_rewriter_triton",
        ":stream_attribute_annotator",
        ":topk_specializer",
        ":topk_splitter",
        ":tree_reduction_rewriter",
        ":triton_fusion_numerics_verifier",
        ":variadic_op_splitter",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:variant",
        "@llvm-project//llvm:AsmParser",
        "@llvm-project//llvm:BitReader",
        "@llvm-project//llvm:BitWriter",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TransformUtils",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Pass",
        "@llvm-project//mlir:Support",
        "//xla:autotune_results_proto_cc",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/transforms:hlo_constant_splitter",
        "//xla/service:algebraic_simplifier",
        "//xla/service:all_gather_broadcast_reorder",
        "//xla/service:all_gather_combiner",
        "//xla/service:all_reduce_combiner",
        "//xla/service:all_reduce_contiguous",
        "//xla/service:all_reduce_folder",
        "//xla/service:all_reduce_promotion",
        "//xla/service:all_reduce_reassociate",
        "//xla/service:async_collective_creator",
        "//xla/service:batchnorm_expander",
        "//xla/service:bitcast_dtypes_expander",
        "//xla/service:broadcast_canonicalizer",
        "//xla/service:buffer_assignment",
        "//xla/service:call_inliner",
        "//xla/service:collective_permute_decomposer",
        "//xla/service:collective_pipeliner",
        "//xla/service:collectives_schedule_linearizer",
        "//xla/service:comparison_expander",
        "//xla/service:conditional_canonicalizer",
        "//xla/service:conditional_simplifier",
        "//xla/service:convert_async_collectives_to_sync",
        "//xla/service:convert_memory_placement_to_internal_annotations",
        "//xla/service:convert_mover",
        "//xla/service:convolution_4d_expander",
        "//xla/service:convolution_pred_expander",
        "//xla/service:copy_insertion",
        "//xla/service:cpu_gpu_shape_verifier",
        "//xla/service:dot_decomposer",
        "//xla/service:dot_merger",
        "//xla/service:dump",
        "//xla/service:dynamic_dimension_simplifier",
        "//xla/service:dynamic_index_splitter",
        "//xla/service:dynamic_padder",
        "//xla/service:eigh_expander",
        "//xla/service:executable",
        "//xla/service:export_hlo",
        "//xla/service:flatten_call_graph",
        "//xla/service:float_normalization",
        "//xla/service:float_support",
        "//xla/service:gather_expander",
        "//xla/service:gather_simplifier",
        "//xla/service:hlo_computation_deduplicator",
        "//xla/service:hlo_constant_folding",
        "//xla/service:hlo_cse",
        "//xla/service:hlo_dataflow_analysis",
        "//xla/service:hlo_dce",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_proto_cc",
        "//xla/service:hlo_rematerialization",
        "//xla/service:hlo_verifier",
        "//xla/service:host_memory_transfer_asyncifier",
        "//xla/service:host_offload_legalize",
        "//xla/service:host_offloader",
        "//xla/service:layout_normalization",
        "//xla/service:llvm_compiler",
        "//xla/service:logistic_expander",
        "//xla/service:loop_schedule_linearizer",
        "//xla/service:operand_upcaster",
        "//xla/service:optimization_barrier_expander",
        "//xla/service:optimize_input_output_buffer_alias",
        "//xla/service:qr_expander",
        "//xla/service:real_imag_expander",
        "//xla/service:reduce_decomposer",
        "//xla/service:reduce_window_rewriter",
        "//xla/service:reduce_scatter_combiner",
        "//xla/service:reduce_scatter_reassociate",
        "//xla/service:reshape_decomposer",
        "//xla/service:reshape_mover",
        "//xla/service:result_caster",
        "//xla/service:rng_bit_generator_expander",
        "//xla/service:rng_expander",
        "//xla/service:scatter_simplifier",
        "//xla/service:sharding_propagation",
        "//xla/service:sharding_remover",
        "//xla/service:simplify_fp_conversions",
        "//xla/service:slice_sinker",
        "//xla/service:slow_operation_alarm",
        "//xla/service:sort_simplifier",
        "//xla/service:stable_sort_expander",
        "//xla/service:stochastic_convert_decomposer",
        "//xla/service:topk_rewriter",
        "//xla/service:transpose_folding",
        "//xla/service:tuple_simplifier",
        "//xla/service:while_loop_all_reduce_code_motion",
        "//xla/service:while_loop_constant_sinking",
        "//xla/service:while_loop_simplifier",
        "//xla/service:while_loop_trip_count_annotator",
        "//xla/service:zero_sized_hlo_elimination",
        "//xla/service/gpu/model:gpu_cost_model_stats_collection",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/service:sub_byte_normalization",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/spmd:collective_permute_motion",
        "//xla/service/spmd:stateful_rng_spmd_partitioner",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor",
        "//xla/stream_executor/gpu:gpu_driver_header",
        "//xla/stream_executor/integrations:device_mem_allocator",
        "//xla/translate/hlo_to_mhlo:hlo_utils",
        "//xla/translate/mhlo_to_hlo:location_exporter",
        "@local_tsl//tsl/platform:blocking_counter",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:platform_port",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
        "@local_tsl//tsl/platform:protobuf",
        "//xla/service:compiler",
        "//xla/service:scatter_expander",
        "//xla:debug_options_flags",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/types:span",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo_module_group",
        "//xla/service:buffer_value",
        "//xla/service:dynamic_dimension_inference",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_ordering",
        "//xla/service:layout_assignment",
        "//xla/service:logical_buffer",
        "@local_tsl//tsl/platform:numbers",
    ]) + xla_internal(["service:export_hlo"]) + [
        ":command_buffer_scheduling",
        ":execution_stream_assignment",
        ":fusion_pipeline",
        ":ir_emitter_context",
        ":ir_emitter_unnested",
        ":prepare_hlo_for_ir_emitting_pipeline",
        ":rename_fusions",
        ":stream_executor_util",
        "//xla/service:all_reduce_splitter",
        "//xla/service/gpu/runtime:thunk",
        "//xla/stream_executor:platform_manager",
        "@com_google_absl//absl/status",
        "@llvm-project//mlir:FuncDialect",
        "@local_tsl//tsl/lib/monitoring:counter",
    ],
)

xla_test(
    name = "gpu_compiler_test",
    srcs = if_gpu_is_configured(["gpu_compiler_test.cc"]),
    backends = ["gpu"],
    data = ["gpu_compiler_test_autotune_db.textproto"],
    deps = [
        ":autotuner_util",
        ":gpu_compiler",
        ":gpu_hlo_schedule",
        ":metrics",
        "//xla:autotune_results_proto_cc",
        "//xla:error_spec",
        "//xla/hlo/ir:hlo",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:xla_debug_info_manager",
        "//xla/stream_executor:device_description",
        "//xla/tests:filecheck",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

xla_test(
    name = "gpu_offloading_test",
    srcs = ["gpu_offloading_test.cc"],
    backends = ["gpu"],
    deps = [
        ":backend_configs_cc",
        ":horizontal_loop_fusion",
        ":metrics",
        "//xla:autotune_results_proto_cc",
        "//xla:error_spec",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_matchers",
        "//xla/service:buffer_assignment",
        "//xla/service:buffer_value",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_memory_scheduler",
        "//xla/service:hlo_rematerialization",
        "//xla/service:hlo_rematerialization_test_utils",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:xla_debug_info_manager",
        "//xla/service/gpu:stream_attribute_annotator",
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_macros_header",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/base:log_severity",
        "@com_google_absl//absl/log:scoped_mock_log",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "auto_sharding_gpu_compiler_test",
    srcs = ["auto_sharding_gpu_compiler_test.cc"],
    backends = ["gpu"],
    tags = ["no_oss"],  # TODO(b/277355322): Make autosharding work in OSS
    deps = [
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_module_config",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "nvptx_compiler",
    srcs = if_cuda_is_configured([
        "nvptx_compiler_registration.cc",
    ]),
    deps = if_cuda_is_configured([
        ":nvptx_compiler_impl",
        "//xla/service:compiler",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "@local_tsl//tsl/platform:path",
    ]),
    alwayslink = True,  # Contains compiler registration
)

cc_library(
    name = "nvptx_compiler_impl",
    srcs = if_cuda_is_configured([
        "nvptx_compiler.cc",
    ]),
    hdrs = if_cuda_is_configured([
        "nvptx_compiler.h",
    ]),
    local_defines = select({
        "//xla/stream_executor/cuda:libnvptxcompiler_support_enabled": [
            "ENABLE_LIBNVPTXCOMPILER_SUPPORT=1",
        ],
        "//conditions:default": [],
    }) + if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = if_cuda_is_configured([
        ":autotuner_util",
        ":buffer_sharing",
        ":conv_algorithm_picker",
        ":cublas_cudnn",
        ":cublas_pad_for_gemms",
        ":cublas_padding_requirements",
        ":cudnn_fused_conv_rewriter",
        ":cudnn_fused_mha_rewriter",
        ":cudnn_fused_mha_transpose_fusion",
        ":cudnn_workspace_rewriter",
        ":cudnn_fusion_compiler",
        ":cudnn_norm_rewriter",
        ":cudnn_pad_for_convolutions",
        ":cudnn_simplify_padding",
        ":cudnn_vectorize_convolutions",
        ":cusolver_rewriter",
        ":dot_sparsity_rewriter",
        ":gemm_algorithm_picker",
        ":gemm_fusion_autotuner",
        ":gpu_algebraic_simplifier",
        ":gpu_asm_opts_util",
        ":gpu_compiler",
        ":gpu_conv_padding_legalization",
        ":gpu_conv_rewriter",
        ":gpu_executable",
        ":gpu_layout_assignment",
        ":gpu_sort_rewriter",
        ":ir_emission_utils",
        ":metrics",
        ":move_copy_to_users",
        ":target_constants",
        ":triangular_solve_rewriter",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/cleanup",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:node_hash_map",
        "@local_config_cuda//cuda:cuda_headers",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//llvm:IRReader",
        "@llvm-project//llvm:Support",
        "//xla:autotune_results_proto_cc",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:call_inliner",
        "//xla/service:convert_mover",
        "//xla/service:dot_dimension_merger",
        "//xla/service:dump",
        "//xla/service:float_normalization",
        "//xla/service:float_support",
        "//xla/service:hlo_constant_folding",
        "//xla/service:hlo_cse",
        "//xla/service:hlo_dataflow_analysis",
        "//xla/service:hlo_dce",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:hlo_proto_cc",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_normalization",
        "//xla/service:llvm_compiler",
        "//xla/service:reshape_decomposer",
        "//xla/service:reshape_mover",
        "//xla/service:tuple_simplifier",
        "//xla/service/gpu/llvm_gpu_backend",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/stream_executor",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/stream_executor/cuda:cuda_asm_compiler",
        "//xla/stream_executor/cuda:cuda_diagnostics",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/cuda:ptx_compiler",
        "//xla/stream_executor/cuda:ptx_compiler_support",
        "//xla/stream_executor/gpu:asm_compiler",
        "//xla/stream_executor/gpu:gpu_asm_opts",
        "//xla/stream_executor/gpu:gpu_driver_header",
        "//xla/stream_executor/gpu:gpu_executor_header",
        "@local_tsl//tsl/platform:cuda_libdevice_path",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
        "//xla/tsl/util:env_var",
    ]) + ["@com_google_absl//absl/status"],
)

xla_test(
    name = "nvptx_compiler_test",
    srcs = if_cuda_is_configured([
        "nvptx_compiler_test.cc",
    ]),
    backends = [
        "gpu_v100",
        "gpu_a100",
    ],
    tags = [
        "no_rocm",
        "nomsan",  # Pulls in precompiled NVIDIA libraries which cause false positives in msan.
    ],
    deps = [
        ":gpu_constants",
        ":gpu_hlo_schedule",
        ":nvptx_compiler_impl",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:backend",
        "//xla/service:buffer_assignment",
        "//xla/service:buffer_value",
        "//xla/service:hlo_ordering",
        "//xla/service:logical_buffer",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gpu_aot_compilation_test",
    srcs = if_gpu_is_configured([
        "gpu_aot_compilation_test.cc",
    ]),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    tags = [
        "gpu",
        "ignore_for_dep=third_party/tensorflow/compiler/xla/service/gpu/amdgpu_compiler.h",
        "no_oss",
        "nomsan",  # Pulls in precompiled NVIDIA libraries which cause false positives in msan.
        "requires-gpu-nvidia",
    ],
    deps = if_cuda_is_configured([
        ":nvptx_compiler_impl",
    ]) + if_rocm_is_configured([
        ":amdgpu_compiler_impl",
    ]) + [
        ":gpu_transfer_manager",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/ir:hlo_module_group",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:gpu_plugin",
        "//xla/service:platform_util",
        "//xla/stream_executor",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "amdgpu_compiler",
    srcs = [
        "amdgpu_compiler_registration.cc",
    ],
    local_defines = if_rocm_is_configured(["TENSORFLOW_USE_ROCM=1"]),
    tags = ["manual"],
    deps = [
        ":amdgpu_compiler_impl",
        "//xla/service:compiler",
        "//xla/stream_executor/rocm:rocm_platform_id",
    ],
    alwayslink = True,  # Contains compiler registration
)

cc_library(
    name = "gpu_algebraic_simplifier",
    srcs = [
        "gpu_algebraic_simplifier.cc",
    ],
    hdrs = [
        "gpu_algebraic_simplifier.h",
    ],
    deps = [
        ":matmul_utils",
        ":triton_support",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:algebraic_simplifier",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_cc_test(
    name = "gpu_algebraic_simplifier_test",
    srcs = ["gpu_algebraic_simplifier_test.cc"],
    deps = [
        ":gpu_algebraic_simplifier",
        "//xla/hlo/ir:hlo",
        "//xla/service:algebraic_simplifier",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "amdgpu_compiler_impl",
    srcs = [
        "amdgpu_compiler.cc",
    ],
    hdrs = [
        "amdgpu_compiler.h",
    ],
    tags = ["manual"],
    deps = [
        ":autotuner_util",
        ":conv_algorithm_picker",
        ":cublas_pad_for_gemms",
        ":cublas_padding_requirements",
        ":cudnn_fused_conv_rewriter",
        ":cusolver_rewriter",
        ":gemm_algorithm_picker",
        ":gpu_compiler",
        ":gpu_conv_padding_legalization",
        ":gpu_conv_rewriter",
        ":gpu_sort_rewriter",
        ":target_constants",
        ":triangular_solve_rewriter",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:algebraic_simplifier",
        "//xla/service:call_inliner",
        "//xla/service:convert_mover",
        "//xla/service:dot_dimension_merger",
        "//xla/service:float_normalization",
        "//xla/service:float_support",
        "//xla/service:hlo_constant_folding",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:hlo_verifier",
        "//xla/service:reshape_mover",
        "//xla/service:tuple_simplifier",
        "//xla/service/gpu/llvm_gpu_backend",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@llvm-project//llvm:ir_headers",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ] + if_rocm_is_configured([
        # keep sorted
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

cc_library(
    name = "all_reduce_blueconnect",
    srcs = ["all_reduce_blueconnect.cc"],
    hdrs = ["all_reduce_blueconnect.h"],
    deps = [
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:computation_placer_hdr",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:btree",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "all_reduce_blueconnect_test",
    srcs = ["all_reduce_blueconnect_test.cc"],
    deps = [
        ":all_reduce_blueconnect",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:computation_placer_hdr",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "xfeed_queue",
    hdrs = ["xfeed_queue.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/synchronization",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "io_feed_manager",
    srcs = [
        "infeed_manager.cc",
        "outfeed_manager.cc",
        "xla_executor_state.h",
    ],
    hdrs = [
        "infeed_manager.h",
        "outfeed_manager.h",
    ],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":xfeed_queue",
        "//xla:literal",
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:util",
        "//xla/stream_executor:device_memory_handle",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/stream_executor/gpu:gpu_executor_header",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/synchronization",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:notification",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_layout_assignment",
    srcs = ["gpu_layout_assignment.cc"],
    hdrs = ["gpu_layout_assignment.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":matmul_utils",
        ":reduction_utils",
        ":stream_executor_util",
        "//xla:shape_layout",
        "//xla:shape_util",
        "//xla:util",
        "//xla:window_util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:computation_layout",
        "//xla/service:host_memory_offload_annotations_hdr",
        "//xla/service:layout_assignment",
        "//xla/service:logical_buffer",
        "//xla/stream_executor",
        "//xla/tsl/util:env_var",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gpu_layout_assignment_test",
    srcs = ["gpu_layout_assignment_test.cc"],
    deps = [
        ":gpu_layout_assignment",
        ":stream_executor_util",
        "//xla:shape_layout",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:computation_layout",
        "//xla/service:hlo_parser",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/stream_executor",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_schedule_postprocessing",
    srcs = ["gpu_schedule_postprocessing.cc"],
    hdrs = ["gpu_schedule_postprocessing.h"],
    deps = [
        ":backend_configs_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gpu_schedule_postprocessing_test",
    srcs = ["gpu_schedule_postprocessing_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":gpu_schedule_postprocessing",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_hlo_schedule",
    srcs = ["gpu_hlo_schedule.cc"],
    hdrs = ["gpu_hlo_schedule.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":gpu_schedule_postprocessing",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:buffer_value",
        "//xla/service:collective_ops_utils",
        "//xla/service:hlo_memory_scheduler",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:latency_hiding_scheduler",
        "//xla/service:p2p_schedule_preparation",
        "//xla/service:profile_guided_latency_estimator",
        "//xla/service/gpu/model:analytical_latency_estimator",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "gpu_hlo_schedule_test",
    srcs = [
        "gpu_hlo_schedule_test.cc",
    ],
    backends = ["gpu"],
    deps = [
        ":gpu_hlo_schedule",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:backend",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_ordering",
        "//xla/stream_executor:device_description",
        "//xla/tests:filecheck",
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_utils",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/protobuf:profiled_instructions_proto_cc",
    ],
)

cc_library(
    name = "gpu_p2p_pipeliner",
    srcs = ["gpu_p2p_pipeliner.cc"],
    hdrs = ["gpu_p2p_pipeliner.h"],
    deps = [
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:collective_ops_utils",
        "//xla/service:collective_pipeliner",
        "//xla/service:hlo_parser",
        "//xla/service:hlo_pass_pipeline",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "gpu_p2p_pipeliner_test",
    srcs = [
        "gpu_p2p_pipeliner_test.cc",
    ],
    deps = [
        ":gpu_p2p_pipeliner",
        "//xla:statusor",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_parser",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:hlo_verifier",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/log:check",
        "@com_google_googletest//:gtest",
    ],
)

xla_cc_test(
    name = "while_transformer_test",
    srcs = ["while_transformer_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        "//xla:comparison_util",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:test",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:while_loop_analysis",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
    ],
)

cuda_library(
    name = "stream_executor_util_kernel",
    srcs = if_cuda_is_configured(["stream_executor_util_kernel.cu.cc"]),
    deps = ["@local_config_cuda//cuda:cuda_headers"],
)

cc_library(
    name = "stream_executor_util",
    srcs = ["stream_executor_util.cc"],
    hdrs = ["stream_executor_util.h"],
    copts = tsl_copts(),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":cublas_cudnn",
        ":launch_dimensions",
        ":stream_executor_util_kernel",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor",
        "//xla/stream_executor:kernel_factory",
        "//xla/stream_executor:launch_dim",
        "//xla/stream_executor:typed_kernel_factory",
        "//xla/tsl/util:env_var",
        "//xla/tsl/util/proto:proto_utils",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@eigen_archive//:eigen3",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "stream_executor_util_test",
    srcs = ["stream_executor_util_test.cc"],
    deps = [
        ":stream_executor_util",
        "//xla:autotuning_proto_cc",
        "//xla/service:hlo_module_config",
        "//xla/tsl/util/proto:proto_utils",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/time",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "gpu_asm_opts_util",
    srcs = ["gpu_asm_opts_util.cc"],
    hdrs = ["gpu_asm_opts_util.h"],
    compatible_with = get_compatible_with_portable(),
    copts = tsl_copts(),
    deps = [
        "//xla:xla_proto_cc",
        "//xla/stream_executor/gpu:gpu_asm_opts",
        "@com_google_absl//absl/strings",
    ],
)

cc_library(
    name = "hlo_fusion_analysis",
    srcs = ["hlo_fusion_analysis.cc"],
    hdrs = ["hlo_fusion_analysis.h"],
    deps = [
        ":backend_configs_cc",
        ":hlo_traversal",
        ":ir_emission_utils",
        ":reduction_utils",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Support",
    ],
)

xla_cc_test(
    name = "hlo_fusion_analysis_test",
    srcs = ["hlo_fusion_analysis_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":gpu_device_info_for_tests",
        ":hlo_fusion_analysis",
        ":hlo_traversal",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "buffer_comparator",
    srcs = if_gpu_is_configured(["buffer_comparator.cc"]),
    hdrs = if_gpu_is_configured(["buffer_comparator.h"]),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = if_gpu_is_configured([
        ":buffer_comparator_kernel",
        ":gpu_asm_opts_util",
        ":launch_dimensions",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@eigen_archive//:eigen3",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:util",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor",
        "//xla/stream_executor:device_memory_handle",
        "//xla/stream_executor:typed_kernel_factory",
        "//xla/stream_executor/gpu:asm_compiler",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:statusor",
    ]),
)

gpu_kernel_library(
    name = "buffer_comparator_kernel",
    srcs = if_gpu_is_configured(["buffer_comparator.cu.cc"]),
    copts = rocm_copts(),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
    ]) + if_rocm_is_configured([
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

xla_test(
    name = "buffer_comparator_test",
    srcs = if_gpu_is_configured(["buffer_comparator_test.cc"]),
    backends = ["gpu"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = [
        ":stream_executor_util",
        "//xla:shape_util",
        "//xla:types",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor:device_memory_handle",
        "//xla/stream_executor:platform_manager",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
    ] + if_gpu_is_configured([
        ":buffer_comparator",
        "//xla/stream_executor:device_memory",
    ]),
)

cc_library(
    name = "buffer_sharing",
    srcs = ["buffer_sharing.cc"],
    hdrs = ["buffer_sharing.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":hlo_fusion_analysis",
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@llvm-project//llvm:Support",
    ],
)

cc_library(
    name = "gpu_fusible",
    srcs = ["gpu_fusible.cc"],
    hdrs = ["gpu_fusible.h"],
    deps = [
        ":backend_configs_cc",
        ":hlo_traversal",
        ":ir_emission_utils",
        ":reduction_utils",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_dataflow_analysis",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_cc_test(
    name = "gpu_fusible_test",
    srcs = ["gpu_fusible_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":gpu_fusible",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "cudnn_fused_conv_rewriter",
    srcs = ["cudnn_fused_conv_rewriter.cc"],
    hdrs = ["cudnn_fused_conv_rewriter.h"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        "//xla:comparison_util",
        "//xla:debug_options_flags",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "//xla/stream_executor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:statusor",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
        "@local_config_cuda//cuda:cudnn_header",
    ]),
)

xla_test(
    name = "cudnn_fused_conv_rewriter_test",
    srcs = ["cudnn_fused_conv_rewriter_test.cc"],
    backend_tags = {
        "gpu_a100": [
            "noasan",
            "nomsan",
            "no_rocm",
        ],
    },
    backends = [
        "gpu_a100",
        "gpu_amd_any",
    ] + if_oss(["gpu_any"]),
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    shard_count = 10,
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":cudnn_fused_conv_rewriter",
        ":gpu_conv_rewriter",
        "//xla:comparison_util",
        "//xla:error_spec",
        "//xla/hlo/ir:hlo",
        "//xla/service:algebraic_simplifier",
        "//xla/service:convert_mover",
        "//xla/service:hlo_constant_folding",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:reshape_mover",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/stream_executor:device_description",
        "//xla/tests:filecheck",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
        "@local_config_cuda//cuda:cudnn_header",
    ]),
)

cc_library(
    name = "cudnn_norm_rewriter",
    srcs = ["cudnn_norm_rewriter.cc"],
    hdrs = ["cudnn_norm_rewriter.h"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:window_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "//xla/stream_executor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/protobuf:dnn_proto_cc",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
        "@local_config_cuda//cuda:cudnn_header",
    ]) + if_static([
        "@com_google_protobuf//:protobuf",
    ]),
)

xla_test(
    name = "cudnn_norm_rewriter_test",
    srcs = ["cudnn_norm_rewriter_test.cc"],
    backends = ["gpu"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":cublas_cudnn",
        ":cudnn_norm_rewriter",
        "//xla:error_spec",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/stream_executor:device_description",
        "//xla/tests:filecheck",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/lib/core:status_test_util",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
        "@local_config_cuda//cuda:cudnn_header",
    ]),
)

cc_library(
    name = "cudnn_fused_mha_rewriter",
    srcs = ["cudnn_fused_mha_rewriter.cc"],
    hdrs = ["cudnn_fused_mha_rewriter.h"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":matmul_utils",
        ":stream_executor_util",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "//xla/stream_executor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
    ]),
)

cc_library(
    name = "cudnn_fused_mha_transpose_fusion",
    srcs = ["cudnn_fused_mha_transpose_fusion.cc"],
    hdrs = ["cudnn_fused_mha_transpose_fusion.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":matmul_utils",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "cudnn_fused_mha_rewriter_test",
    srcs = ["cudnn_fused_mha_rewriter_test.cc"],
    backend_tags = {"gpu": [
        "requires-gpu-nvidia",
        "no_rocm",
    ]},
    backends = [
        "gpu",
    ],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":cudnn_fused_mha_rewriter",
        ":cudnn_fused_mha_transpose_fusion",
        "//xla:error_spec",
        "//xla:test_helpers",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:algebraic_simplifier",
        "//xla/service:computation_layout",
        "//xla/service:hlo_cse",
        "//xla/service:hlo_dce",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_parser",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_normalization",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:reshape_decomposer",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:dnn",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
        "@local_config_cuda//cuda:cudnn_header",
    ]),
)

xla_test(
    name = "float_support_test",
    srcs = ["float_support_test.cc"],
    backend_tags = {"gpu": [
        "requires-gpu-sm80",
    ]},
    backends = [
        "gpu",
    ],
    deps = [
        ":variant_visitor",
        "//xla:error_spec",
        "//xla:xla_proto_cc",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

xla_test(
    name = "conv_layout_normalization_test",
    srcs = ["conv_layout_normalization_test.cc"],
    backends = ["gpu"],
    deps = [
        "//xla:error_spec",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/tests:gpu_codegen_test",  # fixdeps: keep
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_macros_header",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "variadic_op_splitter",
    srcs = ["variadic_op_splitter.cc"],
    hdrs = ["variadic_op_splitter.h"],
    deps = [
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_scatter_expander",
    srcs = ["gpu_scatter_expander.cc"],
    hdrs = ["gpu_scatter_expander.h"],
    deps = [
        "//xla:shape_util",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/service:scatter_expander",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_cc_test(
    name = "variadic_op_splitter_test",
    srcs = ["variadic_op_splitter_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":variadic_op_splitter",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/service:pattern_matcher",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest_main",
    ],
)

tf_proto_library(
    name = "gpu_autotuning_proto",
    srcs = ["gpu_autotuning.proto"],
    cc_api_version = 2,
    protodeps = [
        "//xla:xla_data_proto",
        "//xla/service:hlo_proto",
        "//xla:autotuning_proto",
    ],
)

cc_library(
    name = "hlo_algorithm_denylist",
    srcs = ["hlo_algorithm_denylist.cc"],
    hdrs = ["hlo_algorithm_denylist.h"],
    deps = [
        ":gpu_autotuning_proto_cc",
        "//xla:autotuning_proto_cc",
        "//xla:debug_options_flags",
        "//xla/stream_executor",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log:check",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:status",
    ],
)

xla_cc_test(
    name = "hlo_algorithm_denylist_test",
    srcs = ["hlo_algorithm_denylist_test.cc"],
    data = ["data/hlo_algorithm_denylist.pbtxt"],
    deps = [
        ":hlo_algorithm_denylist",
        "//xla/stream_executor:dnn",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:test",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "alias_passthrough_params",
    srcs = ["alias_passthrough_params.cc"],
    hdrs = ["alias_passthrough_params.h"],
    deps = [
        "//xla:shape_util",
        "//xla:statusor",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

xla_cc_test(
    name = "alias_passthrough_params_test",
    srcs = ["alias_passthrough_params_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":alias_passthrough_params",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "horizontal_loop_fusion",
    srcs = ["horizontal_loop_fusion.cc"],
    hdrs = ["horizontal_loop_fusion.h"],
    deps = [
        ":gpu_fusible",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "//xla/service:sub_byte_normalization",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "horizontal_loop_fusion_test",
    srcs = ["horizontal_loop_fusion_test.cc"],
    backends = ["gpu"],
    deps = [
        ":gpu_device_info_for_tests",
        ":horizontal_loop_fusion",
        ":instruction_fusion",
        "//xla:error_spec",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:test",
        "//xla:test_helpers",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_dce",
        "//xla/service:hlo_parser",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_pass_pipeline",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service:tuple_simplifier",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@local_tsl//tsl/lib/core:status_test_util",
    ],
)

cc_library(
    name = "horizontal_input_fusion",
    srcs = ["horizontal_input_fusion.cc"],
    hdrs = ["horizontal_input_fusion.h"],
    deps = [
        ":gpu_fusible",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "horizontal_input_fusion_test",
    srcs = ["horizontal_input_fusion_test.cc"],
    backends = ["gpu"],
    deps = [
        ":gpu_device_info_for_tests",
        ":horizontal_input_fusion",
        "//xla:error_spec",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:test",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/stream_executor:device_description",
        "//xla/tests:xla_internal_test_main",
    ],
)

cc_library(
    name = "reduction_degenerate_dim_remover",
    srcs = ["reduction_degenerate_dim_remover.cc"],
    hdrs = ["reduction_degenerate_dim_remover.h"],
    deps = [
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "reduction_dimension_grouper",
    srcs = ["reduction_dimension_grouper.cc"],
    hdrs = ["reduction_dimension_grouper.h"],
    deps = [
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "reduction_splitter",
    srcs = ["reduction_splitter.cc"],
    hdrs = ["reduction_splitter.h"],
    deps = [
        ":reduction_utils",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "reduction_splitter_test",
    srcs = ["reduction_splitter_test.cc"],
    deps = [
        ":reduction_splitter",
        "//xla:shape_util",
        "//xla:test",
        "//xla:test_helpers",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
    ],
)

cc_library(
    name = "reduction_layout_normalizer",
    srcs = ["reduction_layout_normalizer.cc"],
    hdrs = ["reduction_layout_normalizer.h"],
    deps = [
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "tree_reduction_rewriter",
    srcs = ["tree_reduction_rewriter.cc"],
    hdrs = ["tree_reduction_rewriter.h"],
    deps = [
        ":reduction_utils",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:collective_ops_utils",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gemm_broadcast_folding_rewriter",
    srcs = ["gemm_broadcast_folding_rewriter.cc"],
    hdrs = ["gemm_broadcast_folding_rewriter.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "metrics",
    srcs = ["metrics.cc"],
    hdrs = ["metrics.h"],
    deps = [
        "@local_tsl//tsl/lib/monitoring:counter",
        "@local_tsl//tsl/lib/monitoring:gauge",
        "@local_tsl//tsl/lib/monitoring:sampler",
    ],
)

cc_library(
    name = "dot_operand_converter",
    srcs = ["dot_operand_converter.cc"],
    hdrs = ["dot_operand_converter.h"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:op_expander_pass",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
    ],
)

xla_test(
    name = "dot_operand_converter_test",
    srcs = if_gpu_is_configured(["dot_operand_converter_test.cc"]),
    backends = [
        "gpu_a100",
        "gpu_p100",
        "gpu_v100",
        "gpu_amd_any",
    ],
    deps = if_gpu_is_configured(
        [
            ":dot_operand_converter",
            "@com_google_googletest//:gtest",
            "@com_google_absl//absl/strings",
            "@com_google_absl//absl/strings:string_view",
            "//xla:shape_util",
            "//xla/hlo/ir:hlo",
            "//xla/hlo/utils:hlo_matchers",
            "//xla/service:pattern_matcher",
            "//xla/tests:hlo_test_base",
            "//xla/tests:xla_internal_test_main",
            "@local_tsl//tsl/platform:statusor",
        ],
        ["@local_tsl//tsl/platform:test_main"],  # b/317293391
    ) + ["//xla:xla_data_proto_cc"],
)

cc_library(
    name = "make_batch_pointers",
    srcs = if_gpu_is_configured(["make_batch_pointers.cc"]),
    hdrs = if_gpu_is_configured(["make_batch_pointers.h"]),
    deps = [
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla/stream_executor",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:typed_kernel_factory",
        "//xla/stream_executor/gpu:gpu_stream_header",
        "@com_google_absl//absl/status",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ] + if_cuda_is_configured([
        ":make_batch_pointers_kernel",
    ]) + if_rocm_is_configured([
        "//xla/stream_executor/rocm:rocm_helpers",
    ]),
)

cuda_library(
    name = "make_batch_pointers_kernel",
    srcs = if_cuda_is_configured(["make_batch_pointers.cu.cc"]),
    deps = [
        "@local_config_cuda//cuda:cuda_headers",  # build_cleaner: keep
    ],
)

cc_library(
    name = "triangular_solve_rewriter",
    srcs = ["triangular_solve_rewriter.cc"],
    hdrs = ["triangular_solve_rewriter.h"],
    deps = [
        ":cublas_cudnn",
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

tsl_gpu_library(
    name = "runtime_intrinsics",
    srcs = ["runtime_intrinsics.cc"],
    hdrs = ["runtime_intrinsics.h"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/service:collective_ops_utils",
        "//xla/service:custom_call_status",
        "//xla/service:custom_call_target_registry",
        "//xla/service:platform_util",
        "//xla/stream_executor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
    alwayslink = 1,
)

xla_test(
    name = "runtime_intrinsics_test",
    srcs = ["runtime_intrinsics_test.cc"],
    backends = ["gpu"],
    deps = [
        ":runtime_intrinsics",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "hlo_fusion_stats",
    srcs = ["hlo_fusion_stats.cc"],
    hdrs = ["hlo_fusion_stats.h"],
    deps = [
        "//xla/hlo/ir:hlo",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
    ],
)

xla_cc_test(
    name = "hlo_fusion_stats_test",
    srcs = ["hlo_fusion_stats_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":hlo_fusion_stats",
        "//xla/service:hlo_parser",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/lib/core:status_test_util",
    ],
)

cc_library(
    name = "scatter_slice_simplifier",
    srcs = ["scatter_slice_simplifier.cc"],
    hdrs = ["scatter_slice_simplifier.h"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "scatter_slice_simplifier_test",
    srcs = ["scatter_slice_simplifier_test.cc"],
    deps = [
        ":scatter_slice_simplifier",
        "//xla:shape_util",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "conv_layout_normalization",
    srcs = ["conv_layout_normalization.cc"],
    hdrs = ["conv_layout_normalization.h"],
    deps = [
        ":cublas_cudnn",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "topk_specializer",
    srcs = ["topk_specializer.cc"],
    hdrs = ["topk_specializer.h"],
    deps = [
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service:hlo_proto_cc",
        "//xla/service:tuple_util",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
    ],
)

cc_library(
    name = "topk_splitter",
    srcs = ["topk_splitter.cc"],
    hdrs = ["topk_splitter.h"],
    deps = [
        "//xla:shape_util",
        "//xla:statusor",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "topk_splitter_test",
    srcs = ["topk_splitter_test.cc"],
    deps = [
        ":topk_splitter",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_dce",
        "//xla/service:pattern_matcher",
        "//xla/service:topk_rewriter",
        "//xla/tests:hlo_test_base",
        "//xla/tests:verified_hlo_module",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

xla_test(
    name = "topk_test",
    srcs = ["topk_test.cc"],
    backends = ["gpu"],
    deps = [
        ":topk_specializer",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service:platform_util",
        "//xla/service:topk_rewriter",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "copy_fusion",
    srcs = ["copy_fusion.cc"],
    hdrs = ["copy_fusion.h"],
    deps = [
        ":gpu_fusible",
        ":hlo_traversal",
        ":ir_emission_utils",
        ":reduction_utils",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "algorithm_checker",
    srcs = ["algorithm_checker.cc"],
    hdrs = ["algorithm_checker.h"],
    deps = [
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:algorithm_util",
        "//xla/service:hlo_pass",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

xla_test(
    name = "dot_algorithm_support_test",
    srcs = if_gpu_is_configured(["dot_algorithm_support_test.cc"]),
    backends = [
        "gpu_v100",
        "gpu_a100",
        "gpu_amd_any",
    ],
    tags = [
        "nomac",
    ],
    deps = [
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "kernel_reuse_cache",
    srcs = ["kernel_reuse_cache.cc"],
    hdrs = ["kernel_reuse_cache.h"],
    deps = [
        ":kernel_arguments",
        ":launch_dimensions",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:launch_dim",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "kernel_arguments",
    srcs = ["kernel_arguments.cc"],
    hdrs = ["kernel_arguments.h"],
    deps = [
        ":gpu_constants",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "hlo_traversal",
    srcs = ["hlo_traversal.cc"],
    hdrs = ["hlo_traversal.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

xla_cc_test(
    name = "hlo_traversal_test",
    srcs = ["hlo_traversal_test.cc"],
    deps = [
        ":hlo_traversal",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "fusion_wrapper",
    srcs = ["fusion_wrapper.cc"],
    hdrs = ["fusion_wrapper.h"],
    deps = [
        ":gpu_fusible",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
    ],
)

xla_cc_test(
    name = "fusion_wrapper_test",
    srcs = ["fusion_wrapper_test.cc"],
    deps = [
        ":fusion_wrapper",
        "//xla/tests:hlo_test_base",
        "@com_google_googletest//:gtest_main",
    ],
)

xla_cc_test(
    name = "copy_fusion_test",
    srcs = ["copy_fusion_test.cc"],
    deps = [
        ":copy_fusion",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

xla_cc_test(
    name = "autotuner_util_test",
    srcs = if_cuda_is_configured(["autotuner_util_test.cc"]),
    deps = if_cuda_is_configured([
        ":autotuner_util",
        "@com_google_googletest//:gtest",
        "@com_google_absl//absl/base:log_severity",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:scoped_mock_log",
        "@com_google_absl//absl/strings",
        "//xla:autotune_results_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/stream_executor/host:host_platform",
        "//xla/tests:hlo_test_base",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:protobuf",
    ]) + [
        "//xla/tests:xla_internal_test_main",  # Keep outside GPU guard
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "double_buffer_loop_unrolling",
    srcs = ["double_buffer_loop_unrolling.cc"],
    hdrs = ["double_buffer_loop_unrolling.h"],
    deps = [
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:collective_ops_utils",
        "//xla/service:flatten_call_graph",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "double_buffer_loop_unrolling_test",
    srcs = ["double_buffer_loop_unrolling_test.cc"],
    deps = [
        ":double_buffer_loop_unrolling",
        "//xla:test",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:tuple_simplifier",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/container:flat_hash_set",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "determinism_test",
    srcs = if_gpu_is_configured(["determinism_test.cc"]),
    backends = [
        "gpu_a100",
        "gpu_amd_any",
    ],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([
        "TENSORFLOW_USE_ROCM=1",
    ]),
    deps = if_gpu_is_configured(
        [
            ":autotuner_util",
            "@com_google_googletest//:gtest_main",
            "@com_google_absl//absl/strings",
            "//xla:literal",
            "//xla:xla_proto_cc",
            "//xla/hlo/ir:hlo",
            "//xla/service/gpu/tests:gpu_codegen_test",
            "//xla/stream_executor:device_description",
            "//xla/stream_executor/gpu:gpu_timer",
            "//xla/tests:hlo_test_base",
            "//xla/tests:literal_test_util",
            "//xla/tests:test_utils",
            "@local_tsl//tsl/platform:statusor",
        ],
        ["@local_tsl//tsl/platform:test_main"],  # b/317293391
    ),
)

cc_library(
    name = "gpu_symbol_repository",
    hdrs = ["gpu_symbol_repository.h"],
    deps = [
        "//xla:autotune_results_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/service:symbol_repository",
    ],
)

cc_library(
    name = "collective_permute_cycle_decomposer",
    srcs = ["collective_permute_cycle_decomposer.cc"],
    hdrs = ["collective_permute_cycle_decomposer.h"],
    deps = [
        ":backend_configs_cc",
        "//xla:comparison_util",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:collective_ops_utils",
        "//xla/service:hlo_parser",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
    ],
)

xla_cc_test(
    name = "collective_permute_cycle_decomposer_test",
    srcs = ["collective_permute_cycle_decomposer_test.cc"],
    deps = [
        ":collective_permute_cycle_decomposer",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_parser",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "stream_attribute_annotator",
    srcs = ["stream_attribute_annotator.cc"],
    hdrs = ["stream_attribute_annotator.h"],
    deps = [
        ":backend_configs_cc",
        ":gpu_fusible",
        "//xla:comparison_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:hlo_pass",
        "//xla/service/gpu/runtime:thunk",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "stream_attribute_annotator_test",
    srcs = ["stream_attribute_annotator_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":stream_attribute_annotator",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "stream_attribute_async_wrapper",
    srcs = ["stream_attribute_async_wrapper.cc"],
    hdrs = ["stream_attribute_async_wrapper.h"],
    deps = [
        ":backend_configs_cc",
        "//xla:comparison_util",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_pass",
        "//xla/service/gpu/runtime:thunk",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "stream_attribute_async_wrapper_test",
    srcs = ["stream_attribute_async_wrapper_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":stream_attribute_async_wrapper",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_windowed_einsum_handler",
    srcs = ["gpu_windowed_einsum_handler.cc"],
    hdrs = ["gpu_windowed_einsum_handler.h"],
    deps = [
        ":backend_configs_cc",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:hlo_pass",
        "//xla/service:pattern_matcher",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "gpu_windowed_einsum_handler_test",
    srcs = ["gpu_windowed_einsum_handler_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":gpu_windowed_einsum_handler",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:pattern_matcher",
        "//xla/service:pattern_matcher_gmock",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "triton_fusion_numerics_verifier",
    srcs = if_gpu_is_configured(["triton_fusion_numerics_verifier.cc"]),
    hdrs = if_gpu_is_configured(["triton_fusion_numerics_verifier.h"]),
    deps = if_gpu_is_configured([
        ":autotuner_compile_util",
        ":autotuner_util",
        ":backend_configs_cc",
        ":buffer_comparator",
        ":ir_emission_utils",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:executable",
        "//xla/service:hlo_pass",
        "//xla/service:shaped_buffer",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor:stream_executor_headers",
        "//xla/tools:hlo_decomposer_lib",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ]) + ["@com_google_absl//absl/status"],
)

xla_test(
    name = "triton_fusion_numerics_verifier_test",
    srcs = if_gpu_is_configured(["triton_fusion_numerics_verifier_test.cc"]),
    backend_tags = {"gpu": [
        "requires-gpu-sm80",
    ]},
    backends = ["gpu"],
    deps = [
        ":autotuner_compile_util",
        ":autotuner_util",
        ":triton_fusion_numerics_verifier",
        "//xla:shape_util",
        "//xla:test_helpers",
        "//xla/hlo/ir:hlo",
        "//xla/service:platform_util",
        "//xla/stream_executor:platform",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/lib/core:status_test_util",
    ],
)

cc_library(
    name = "pipelined_p2p_rewriter",
    srcs = ["pipelined_p2p_rewriter.cc"],
    hdrs = ["pipelined_p2p_rewriter.h"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:collective_ops_utils",
        "//xla/service:hlo_pass",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "pipelined_p2p_rewriter_test",
    srcs = ["pipelined_p2p_rewriter_test.cc"],
    deps = [
        ":pipelined_p2p_rewriter",
        "//xla/hlo/ir:hlo",
        "//xla/tests:filecheck",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "execution_stream_assignment",
    srcs = ["execution_stream_assignment.cc"],
    hdrs = ["execution_stream_assignment.h"],
    deps = [
        "//xla/hlo/ir:hlo",
        "//xla/service:call_graph",
        "//xla/service/gpu/runtime:thunk",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "execution_stream_assignment_test",
    srcs = ["execution_stream_assignment_test.cc"],
    deps = [
        ":execution_stream_assignment",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/runtime:thunk",
        "//xla/tests:hlo_test_base",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)
