!------------------------------------------------------------------------------------------------ ! Copyright (c) 2021 - 2025, Oleg Shatrov ! All rights reserved. ! This file is part of dtFFT library. ! dtFFT is free software: you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by ! the Free Software Foundation, either version 3 of the License, or ! (at your option) any later version. ! dtFFT is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU General Public License for more details. ! You should have received a copy of the GNU General Public License ! along with this program. If not, see <https://www.gnu.org/licenses/>. !------------------------------------------------------------------------------------------------ module dtfft_interface_cuda !! CUDA Driver Interfaces !! !! CUDA Driver is loaded at runtime via dynamic loading. use iso_c_binding use iso_fortran_env, only: int32 use dtfft_errors, only: DTFFT_SUCCESS use dtfft_parameters, only: dtfft_stream_t use dtfft_utils, only: string, dynamic_load, destroy_strings implicit none private #include "_dtfft_private.h" public :: load_cuda public :: cuLaunchKernel public :: dim3 type, bind(C) :: dim3 !! Dimension specification type integer(c_int) :: x,y,z end type public :: kernelArgs type, bind(C) :: kernelArgs !! Arguments passed to nvrtc-compiled kernels ! integer(c_int) :: n_longs = 0 ! integer(c_size_t) :: longs(1) integer(c_int) :: n_ints = 0 !! Number of integers provided integer(c_int) :: ints(5) !! Integer array integer(c_int) :: n_ptrs = 0 !! Number of pointers provided type(c_ptr) :: ptrs(3) !! Pointer array end type kernelArgs public :: CUmodule type, bind(C) :: CUmodule !! CUDA module type(c_ptr) :: ptr !! Actual pointer end type CUmodule public :: CUfunction type, bind(C) :: CUfunction !! CUDA function type(c_ptr) :: ptr !! Actual pointer end type CUfunction abstract interface function cuModuleLoadData_interface(mod, image) & result(cuResult) !! Load a module's data with options. !! !! Takes a pointer image and loads the corresponding module module into the current context. !! The image may be a cubin or fatbin as output by nvcc, or a NULL-terminated PTX, either as output by nvcc or hand-written. import type(CUmodule) :: mod !! Returned module character(c_char) :: image(*) !! Module data to load integer(c_int) :: cuResult !! Driver result code end function cuModuleLoadData_interface function cuModuleUnload_interface(hmod) & result(cuResult) !! Unloads a module. !! !! Unloads a module ``hmod`` from the current context. !! Attempting to unload a module which was obtained from the Library Management API !! such as ``cuLibraryGetModule`` will return ``CUDA_ERROR_NOT_PERMITTED``. import type(CUmodule), value :: hmod !! Module to unload integer(c_int) :: cuResult !! Driver result code end function cuModuleUnload_interface function cuModuleGetFunction_interface(hfunc, hmod, name) & result(cuResult) !! Returns a function handle. !! !! Returns in ``hfunc`` the handle of the function of name name located in module hmod. !! If no function of that name exists, ``cuModuleGetFunction`` returns ``CUDA_ERROR_NOT_FOUND``. import type(CUfunction) :: hfunc !! Returns a function handle. type(CUmodule), value :: hmod !! Module to retrieve function from character(c_char) :: name(*) !! Name of function to retrieve integer(c_int) :: cuResult !! Driver result code end function cuModuleGetFunction_interface function cuLaunchKernel_interface(func, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream, kernelParams, extra) & result(cuResult) !! Launches a CUDA function CUfunction. import type(CUfunction), value :: func !! CUDA function to launch integer(c_int), value :: gridDimX !! Grid dimensions in X integer(c_int), value :: gridDimY !! Grid dimensions in Y integer(c_int), value :: gridDimZ !! Grid dimensions in Z integer(c_int), value :: blockDimX !! Block dimensions in X integer(c_int), value :: blockDimY !! Block dimensions in Y integer(c_int), value :: blockDimZ !! Block dimensions in Z integer(c_int), value :: sharedMemBytes !! Dynamic shared memory size type(dtfft_stream_t), value :: stream !! Stream identifier type(c_ptr) :: kernelParams(*) !! Array of pointers to kernel parameters type(c_ptr) :: extra !! Dynamic shared-memory size per thread block in bytes integer(c_int) :: cuResult !! Driver result code end function cuLaunchKernel_interface end interface logical, save :: is_loaded = .false. !! Flag indicating whether the library is loaded type(c_ptr), save :: libcuda !! Handle to the loaded library type(c_funptr), save :: cuFunctions(4) !! Array of pointers to the CUDA functions procedure(cuModuleLoadData_interface), pointer, public :: cuModuleLoadData !! Fortran pointer to the cuModuleLoadData function procedure(cuModuleUnload_interface), pointer, public :: cuModuleUnload !! Fortran pointer to the cuModuleUnload function procedure(cuModuleGetFunction_interface), pointer, public :: cuModuleGetFunction !! Fortran pointer to the cuModuleGetFunction function procedure(cuLaunchKernel_interface), pointer :: cuLaunchKernel_ !! Fortran pointer to the cuLaunchKernel function contains function load_cuda() result(error_code) !! Loads the CUDA Driver library and needed symbols integer(int32) :: error_code !! Error code type(string), allocatable :: func_names(:) error_code = DTFFT_SUCCESS if ( is_loaded ) return allocate(func_names(4)) func_names(1) = string("cuModuleLoadData") func_names(2) = string("cuModuleUnload") func_names(3) = string("cuModuleGetFunction") func_names(4) = string("cuLaunchKernel") error_code = dynamic_load("libcuda.so", func_names, libcuda, cuFunctions) call destroy_strings(func_names) if ( error_code /= DTFFT_SUCCESS ) return call c_f_procpointer(cuFunctions(1), cuModuleLoadData) call c_f_procpointer(cuFunctions(2), cuModuleUnload) call c_f_procpointer(cuFunctions(3), cuModuleGetFunction) call c_f_procpointer(cuFunctions(4), cuLaunchKernel_) is_loaded = .true. end function load_cuda function cuLaunchKernel(func, in, out, blocks, threads, stream, kernelParams) result(cuResult) !! Launches a CUDA kernel type(CUfunction), intent(in) :: func !! Function CUfunction or Kernel CUkernel to launch type(c_ptr), target, intent(in) :: in !! Input pointer type(c_ptr), target, intent(in) :: out !! Output pointer type(dim3), intent(in) :: blocks !! Grid in blocks type(dim3), intent(in) :: threads !! Thread block type(dtfft_stream_t), intent(in) :: stream !! Stream identifier type(kernelArgs), target, intent(in) :: kernelParams !! Input parameters of kernel `func` integer(c_int) :: cuResult !! Driver result code type(c_ptr) :: args(10) ! integer(int32), pointer :: param integer(int32) :: i, temp args(:) = c_null_ptr ! Addresses of pointers are required, not the pointers themselves args(1) = c_loc(out) args(2) = c_loc(in) temp = 2 ! do i = 1, kernelParams%n_longs ! args(temp + i) = c_loc(kernelParams%longs(i)) ! enddo ! temp = temp + kernelParams%n_longs do i = 1, kernelParams%n_ints args(temp + i) = c_loc(kernelParams%ints(i)) enddo temp = temp + kernelParams%n_ints do i = 1, kernelParams%n_ptrs args(temp + i) = c_loc(kernelParams%ptrs(i)) enddo cuResult = cuLaunchKernel_(func, blocks%x, blocks%y, blocks%z, threads%x, threads%y, threads%z, 0, stream, args, c_null_ptr) end function cuLaunchKernel end module dtfft_interface_cuda