You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
109 lines
3.1 KiB
109 lines
3.1 KiB
# NVIDIA NCCL 2
|
|
# A package of optimized primitives for collective multi-GPU communication.
|
|
|
|
licenses(["notice"])
|
|
|
|
exports_files(["LICENSE.txt"])
|
|
|
|
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
|
|
load(
|
|
"@local_config_nccl//:build_defs.bzl",
|
|
"cuda_rdc_library",
|
|
"gen_device_srcs",
|
|
)
|
|
|
|
cc_library(
|
|
name = "src_hdrs",
|
|
hdrs = [
|
|
"src/include/collectives.h",
|
|
"src/nccl.h",
|
|
],
|
|
strip_include_prefix = "src",
|
|
)
|
|
|
|
cc_library(
|
|
name = "include_hdrs",
|
|
hdrs = glob(["src/include/**"]),
|
|
strip_include_prefix = "src/include",
|
|
deps = ["@local_config_cuda//cuda:cuda_headers"],
|
|
)
|
|
|
|
cc_library(
|
|
name = "device_hdrs",
|
|
hdrs = glob(["src/collectives/device/*.h"]),
|
|
strip_include_prefix = "src/collectives/device",
|
|
)
|
|
|
|
# NCCL compiles the same source files with different NCCL_OP/NCCL_TYPE defines.
|
|
# RDC compilation requires that each compiled module has a unique ID. Clang
|
|
# derives the module ID from the path only so we need to copy the files to get
|
|
# different IDs for different parts of compilation. NVCC does not have that
|
|
# problem because it generates IDs based on preprocessed content.
|
|
gen_device_srcs(
|
|
name = "device_srcs",
|
|
srcs = [
|
|
"src/collectives/device/all_gather.cu.cc",
|
|
"src/collectives/device/all_reduce.cu.cc",
|
|
"src/collectives/device/broadcast.cu.cc",
|
|
"src/collectives/device/reduce.cu.cc",
|
|
"src/collectives/device/reduce_scatter.cu.cc",
|
|
"src/collectives/device/sendrecv.cu.cc",
|
|
],
|
|
)
|
|
|
|
cuda_rdc_library(
|
|
name = "device",
|
|
srcs = [
|
|
"src/collectives/device/functions.cu.cc",
|
|
":device_srcs",
|
|
] + glob([
|
|
# Required for header inclusion checking, see below for details.
|
|
"src/collectives/device/*.h",
|
|
"src/nccl.h",
|
|
]),
|
|
deps = [
|
|
":device_hdrs",
|
|
":include_hdrs",
|
|
":src_hdrs",
|
|
"@local_config_cuda//cuda:cuda_headers",
|
|
],
|
|
)
|
|
|
|
# Primary NCCL target.
|
|
#
|
|
# This needs to be cuda_library instead of cc_library so that clang uses the
|
|
# correct name for kernel host stubs (function pointers to initialize ncclKerns
|
|
# in enqueue.cc) after https://reviews.llvm.org/D68578.
|
|
cuda_library(
|
|
name = "nccl",
|
|
srcs = glob(
|
|
include = [
|
|
"src/**/*.cc",
|
|
# Required for header inclusion checking, see below for details.
|
|
"src/graph/*.h",
|
|
],
|
|
# Exclude device-library code.
|
|
exclude = ["src/collectives/device/**"],
|
|
) + [
|
|
# Required for header inclusion checking (see
|
|
# http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs).
|
|
# Files in src/ which #include "nccl.h" load it from there rather than
|
|
# from the virtual includes directory.
|
|
"src/include/collectives.h",
|
|
"src/nccl.h",
|
|
],
|
|
hdrs = ["src/nccl.h"],
|
|
include_prefix = "third_party/nccl",
|
|
linkopts = select({
|
|
"@org_tensorflow//tensorflow:macos": [],
|
|
"//conditions:default": ["-lrt"],
|
|
}),
|
|
strip_include_prefix = "src",
|
|
visibility = ["//visibility:public"],
|
|
deps = [
|
|
":device",
|
|
":include_hdrs",
|
|
":src_hdrs",
|
|
],
|
|
)
|