ncclMemAlloc Interface

interface

Allocate a GPU buffer with size. Allocated buffer head address will be returned by ptr, and the actual allocated size can be larger than requested because of the buffer granularity requirements from all types of NCCL optimizations.


Called by

interface~~ncclmemalloc~~CalledByGraph interface~ncclmemalloc ncclMemAlloc proc~alloc_mem alloc_mem proc~alloc_mem->interface~ncclmemalloc proc~alloc_and_set_aux alloc_and_set_aux proc~alloc_and_set_aux->proc~alloc_mem proc~execute_autotune execute_autotune proc~execute_autotune->proc~alloc_mem proc~execute_autotune->proc~alloc_and_set_aux proc~mem_alloc~4 reshape_plan_base%mem_alloc proc~mem_alloc~4->proc~alloc_mem proc~autotune_adaptive autotune_adaptive proc~autotune_adaptive->proc~execute_autotune proc~autotune_reshape_plan autotune_reshape_plan proc~autotune_reshape_plan->proc~execute_autotune proc~autotune_reshape_plan->proc~autotune_adaptive proc~execute_many execute_many proc~execute_many->proc~execute_autotune proc~execute_single execute_single proc~execute_many->proc~execute_single proc~run_execute_single run_execute_single proc~run_execute_single->proc~execute_autotune proc~create reshape_plan%create proc~create->proc~autotune_reshape_plan proc~execute_single_transpose_modes execute_single_transpose_modes proc~execute_single_transpose_modes->proc~run_execute_single proc~run_autotune_backend run_autotune_backend proc~run_autotune_backend->proc~execute_many proc~run_autotune_backend->proc~execute_single proc~autotune_grid autotune_grid proc~autotune_grid->proc~run_autotune_backend proc~create~15 transpose_plan%create proc~create~15->proc~run_autotune_backend proc~autotune_grid_decomposition autotune_grid_decomposition proc~create~15->proc~autotune_grid_decomposition proc~execute_single->proc~execute_single_transpose_modes proc~autotune_grid_decomposition->proc~autotune_grid

public function ncclMemAlloc(ptr, alloc_bytes) result(ncclResult_t) bind(C, name="ncclMemAlloc")

Arguments

Type IntentOptional Attributes Name
type(c_ptr), intent(out) :: ptr

Buffer address

integer(kind=c_size_t), intent(in), value :: alloc_bytes

Number of bytes to allocate

Return Value integer(kind=c_int32_t)

Completion status