load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm")
load("@local_tsl//tsl/platform:build_config.bzl", "tf_proto_library")
load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library")
load("//xla:xla.bzl", "xla_cc_test")
load("//xla/stream_executor:build_defs.bzl", "if_cuda_or_rocm", "if_gpu_is_configured")
load("//xla/tests:build_defs.bzl", "xla_test")
load("//xla/tsl:tsl.bzl", "if_google", "internal_visibility")

package(
    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
    default_visibility = internal_visibility(["//xla:internal"]),
    licenses = ["notice"],
)

cc_library(
    name = "gpu_helpers",
    srcs = ["gpu_helpers.cc"],
    hdrs = ["gpu_helpers.h"],
    visibility = internal_visibility(["//xla/pjrt:friends"]),
    deps = [
        "//xla:statusor",
        "//xla:types",
        "//xla:util",
        "//xla/client:client_library",
        "//xla/client:local_client",
        "//xla/service:platform_util",
        "//xla/stream_executor",
        "//xla/stream_executor/integrations:device_mem_allocator",
        "//xla/tsl/util:env_var",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/framework:bfc_allocator",
        "@local_tsl//tsl/framework:device_id_impl",
    ],
)

cc_library(
    name = "se_gpu_pjrt_client",
    srcs = ["se_gpu_pjrt_client.cc"],
    hdrs = ["se_gpu_pjrt_client.h"],
    defines = if_cuda(["GOOGLE_CUDA=1"]) + if_rocm(["TENSORFLOW_USE_ROCM=1"]),
    visibility = internal_visibility(["//xla/pjrt:friends"]),
    deps = [
        ":gpu_helpers",
        ":gpu_metrics",
        ":gpu_topology",
        ":gpu_topology_proto_cc",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/client:client_library",
        "//xla/client:local_client",
        "//xla/client:xla_computation",
        "//xla/pjrt:compile_options_proto_cc",
        "//xla/pjrt:event_pool",
        "//xla/pjrt:local_device_state",
        "//xla/pjrt:mlir_to_hlo",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_compiler",
        "//xla/pjrt:pjrt_device_description",
        "//xla/pjrt:pjrt_executable",
        "//xla/pjrt:pjrt_future",
        "//xla/pjrt:pjrt_stream_executor_client",
        "//xla/pjrt:stream_executor_executable",
        "//xla/pjrt:stream_executor_executable_proto_cc",
        "//xla/pjrt:tracked_device_buffer",
        "//xla/pjrt:utils",
        "//xla/pjrt/distributed:client",
        "//xla/pjrt/distributed:in_memory_key_value_store",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/pjrt/distributed:topology_util",
        "//xla/service:compiler",
        "//xla/service:computation_placer_hdr",
        "//xla/service:executable",
        "//xla/service:global_device_id",
        "//xla/service:platform_util",
        "//xla/service:shaped_buffer",
        "//xla/service:transfer_manager",
        "//xla/service/gpu:gpu_executable_run_options",
        "//xla/stream_executor",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor:platform",
        "//xla/stream_executor/integrations:device_mem_allocator",
        "//xla/stream_executor/integrations:tf_allocator_adapter",
        "//xla/tsl/util:env_var",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/functional:bind_front",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/framework:allocator",
        "@local_tsl//tsl/framework:bfc_allocator",
        "@local_tsl//tsl/framework:device_id",
        "@local_tsl//tsl/framework:device_id_impl",
        "@local_tsl//tsl/lib/strings:proto_serialization",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:fingerprint",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:connected_traceme",
        "@local_tsl//tsl/profiler/lib:traceme",
    ] + if_cuda_or_rocm([
        ":nccl_id_store",
        "//xla/service/gpu:gpu_compiler",
    ]) + if_cuda([
        "@local_config_cuda//cuda:cuda_headers",
        "//xla/stream_executor/gpu:gpu_cudamallocasync_allocator",
    ]) + if_rocm([
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

xla_cc_test(
    name = "se_gpu_pjrt_client_test",
    srcs = if_gpu_is_configured(["se_gpu_pjrt_client_test.cc"]),
    tags = [
        "gpu",
        "no_oss",
        "noasan",
        "nomsan",
        "requires-gpu-nvidia:2",
    ],
    deps = [
        ":gpu_topology",
        ":se_gpu_pjrt_client",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:statusor",
        "//xla:test",
        "//xla:xla_data_proto_cc",
        "//xla/client:xla_computation",
        "//xla/ffi",
        "//xla/ffi:ffi_api",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_executable",
        "//xla/pjrt:pjrt_future",
        "//xla/pjrt:pjrt_stream_executor_client",
        "//xla/pjrt/distributed:in_memory_key_value_store",
        "//xla/service:gpu_plugin",
        "//xla/service:hlo_parser",
        "//xla/service:platform_util",
        "//xla/stream_executor",
        "//xla/tests:literal_test_util",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@local_tsl//tsl/lib/core:status_test_util",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ],
)

cc_library(
    name = "nccl_id_store",
    srcs = ["nccl_id_store.cc"],
    hdrs = ["nccl_id_store.h"],
    deps = [
        "//xla:status_macros",
        "//xla:statusor",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:global_device_id",
        "//xla/service/gpu/runtime:nccl_api",
        "//xla/service/gpu/runtime:nccl_clique_key",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "pjrt_client_test_se_gpu",
    srcs = ["pjrt_client_test_se_gpu.cc"],
    backend_tags = {
        "gpu": ["multi_gpu"],
    },
    backends = ["gpu"],
    tags = [
        "no_oss",
        "notap",
    ],
    deps = [
        ":se_gpu_pjrt_client",
        "//xla/pjrt:pjrt_client_test_common",
        "@local_tsl//tsl/platform:test_main",
    ],
)

tf_proto_library(
    name = "gpu_topology_proto",
    srcs = ["gpu_topology.proto"],
    cc_api_version = 2,
    visibility = ["//visibility:public"],
)

cc_library(
    name = "gpu_topology",
    srcs = ["gpu_topology.cc"],
    hdrs = ["gpu_topology.h"],
    deps = [
        ":gpu_topology_proto_cc",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "se_gpu_pjrt_compiler",
    srcs = ["se_gpu_pjrt_compiler.cc"],
    hdrs = ["se_gpu_pjrt_compiler.h"],
    defines = if_cuda(["GOOGLE_CUDA=1"]) + if_rocm(["TENSORFLOW_USE_ROCM=1"]),
    deps = [
        ":se_gpu_pjrt_client",
        "//xla:status_macros",
        "//xla/client:local_client",
        "//xla/client:xla_computation",
        "//xla/pjrt:mlir_to_hlo",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_compiler",
        "//xla/pjrt:pjrt_executable",
        "//xla/pjrt:pjrt_stream_executor_client",
        "//xla/pjrt:stream_executor_executable",
        "//xla/pjrt:utils",
        "//xla/service:compiler",
        "//xla/service:dump",
        "//xla/service:hlo_module_util",
        "//xla/service:hlo_proto_cc",
        "//xla/service:hlo_proto_util",
        "//xla/service:local_service",
        "//xla/service:local_service_utils",
        "//xla/service/gpu:executable_proto_cc",
        "//xla/stream_executor/platform",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ] + if_cuda_or_rocm([
        ":nccl_id_store",
        "//xla/service/gpu:gpu_compiler",
    ]) + if_cuda([
        "@local_config_cuda//cuda:cuda_headers",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/cuda:cuda_activation_header",
        "//xla/stream_executor/gpu:gpu_cudamallocasync_allocator",
        "//xla/service/gpu:nvptx_compiler_impl",
    ]) + if_rocm([
        "@local_config_rocm//rocm:rocm_headers",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "//xla/service/gpu:amdgpu_compiler_impl",
    ]),
    alwayslink = True,
)

cc_library(
    name = "gpu_metrics",
    srcs = ["gpu_metrics.cc"],
    hdrs = ["gpu_metrics.h"],
    deps = [
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/lib/monitoring:gauge",
    ],
)

xla_cc_test(
    name = "se_gpu_pjrt_compiler_test",
    srcs = if_gpu_is_configured(["se_gpu_pjrt_compiler_test.cc"]),
    tags = [
        "gpu",
        "no_oss",
        "requires-gpu-nvidia",
    ] + if_google(["config-cuda-only"]),
    deps = [
        ":gpu_topology",
        ":se_gpu_pjrt_client",
        ":se_gpu_pjrt_compiler",
        "//xla:test",
        "//xla/client:xla_computation",
        "//xla/mlir_hlo",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_compiler",
        "//xla/service:gpu_plugin",
        "//xla/service:hlo_parser",
        "//xla/stream_executor/cuda:cublas_plugin",
        "//xla/tests:literal_test_util",
        "@com_google_absl//absl/status",
        "@com_google_googletest//:gtest",
        "@llvm-project//mlir:FuncDialect",
        "@llvm-project//mlir:Parser",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ],
)

xla_cc_test(
    name = "se_gpu_pjrt_compiler_aot_test",
    srcs = if_gpu_is_configured(["se_gpu_pjrt_compiler_aot_test.cc"]),
    tags = [
        "gpu",
        "no_oss",
        "requires-gpu-nvidia",
    ] + if_google(["config-cuda-only"]),
    deps = [
        ":se_gpu_pjrt_client",
        ":se_gpu_pjrt_compiler",
        "//xla:literal",
        "//xla:literal_util",
        "//xla/client:xla_computation",
        "//xla/mlir_hlo",
        "//xla/pjrt:pjrt_client",
        "//xla/pjrt:pjrt_compiler",
        "//xla/pjrt:pjrt_executable",
        "//xla/service:compiler",
        "//xla/service:gpu_plugin",
        "//xla/service:hlo_parser",
        "//xla/tests:literal_test_util",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@llvm-project//mlir:FuncDialect",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Parser",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test_main",
    ] + if_cuda([
        "//xla/service/gpu:nvptx_compiler_impl",
        "//xla/stream_executor/cuda:cublas_plugin",
    ]) + if_rocm([
        "//xla/service/gpu:amdgpu_compiler_impl",
        "//xla/stream_executor/rocm:rocblas_plugin",
    ]),
)
