!------------------------------------------------------------------------------------------------ ! Copyright (c) 2021 - 2025, Oleg Shatrov ! All rights reserved. ! This file is part of dtFFT library. ! dtFFT is free software: you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by ! the Free Software Foundation, either version 3 of the License, or ! (at your option) any later version. ! dtFFT is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU General Public License for more details. ! You should have received a copy of the GNU General Public License ! along with this program. If not, see <https://www.gnu.org/licenses/>. !------------------------------------------------------------------------------------------------ module dtfft_interface_cuda !! CUDA Driver Interfaces !! !! CUDA Driver is loaded at runtime via dynamic loading. use iso_c_binding use iso_fortran_env, only: int32 use dtfft_errors, only: DTFFT_SUCCESS use dtfft_parameters, only: dtfft_stream_t use dtfft_utils, only: string, dynamic_load, destroy_strings implicit none private #include "_dtfft_private.h" public :: load_cuda public :: cuLaunchKernel public :: dim3 type, bind(C) :: dim3 !! Dimension specification type integer(c_int) :: x,y,z end type integer(int32), parameter, public :: MAX_KERNEL_ARGS = 9 public :: CUmodule type, bind(C) :: CUmodule !! CUDA module type(c_ptr) :: ptr !! Actual pointer end type CUmodule public :: CUfunction type, bind(C) :: CUfunction !! CUDA function type(c_ptr) :: ptr !! Actual pointer end type CUfunction abstract interface function cuModuleLoadData_interface(mod, image) & result(cuResult) !! Load a module's data with options. !! !! Takes a pointer image and loads the corresponding module module into the current context. !! The image may be a cubin or fatbin as output by nvcc, or a NULL-terminated PTX, either as output by nvcc or hand-written. import type(CUmodule) :: mod !! Returned module type(c_ptr), value :: image !! Module data to load integer(c_int) :: cuResult !! Driver result code end function cuModuleLoadData_interface function cuModuleUnload_interface(hmod) & result(cuResult) !! Unloads a module. !! !! Unloads a module ``hmod`` from the current context. !! Attempting to unload a module which was obtained from the Library Management API !! such as ``cuLibraryGetModule`` will return ``CUDA_ERROR_NOT_PERMITTED``. import type(CUmodule), value :: hmod !! Module to unload integer(c_int) :: cuResult !! Driver result code end function cuModuleUnload_interface function cuModuleGetFunction_interface(hfunc, hmod, name) & result(cuResult) !! Returns a function handle. !! !! Returns in ``hfunc`` the handle of the function of name name located in module hmod. !! If no function of that name exists, ``cuModuleGetFunction`` returns ``CUDA_ERROR_NOT_FOUND``. import type(CUfunction) :: hfunc !! Returns a function handle. type(CUmodule), value :: hmod !! Module to retrieve function from type(c_ptr), value :: name !! Name of function to retrieve integer(c_int) :: cuResult !! Driver result code end function cuModuleGetFunction_interface function cuLaunchKernel_interface(func, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream, kernelParams, extra) & result(cuResult) !! Launches a CUDA function CUfunction. import type(CUfunction), value :: func !! CUDA function to launch integer(c_int), value :: gridDimX !! Grid dimensions in X integer(c_int), value :: gridDimY !! Grid dimensions in Y integer(c_int), value :: gridDimZ !! Grid dimensions in Z integer(c_int), value :: blockDimX !! Block dimensions in X integer(c_int), value :: blockDimY !! Block dimensions in Y integer(c_int), value :: blockDimZ !! Block dimensions in Z integer(c_int), value :: sharedMemBytes !! Dynamic shared memory size type(dtfft_stream_t), value :: stream !! Stream identifier type(c_ptr) :: kernelParams(*) !! Array of pointers to kernel parameters type(c_ptr) :: extra !! Dynamic shared-memory size per thread block in bytes integer(c_int) :: cuResult !! Driver result code end function cuLaunchKernel_interface end interface logical, save :: is_loaded = .false. !! Flag indicating whether the library is loaded type(c_ptr), save :: libcuda !! Handle to the loaded library type(c_funptr), save :: cuFunctions(4) !! Array of pointers to the CUDA functions procedure(cuModuleLoadData_interface), pointer, public :: cuModuleLoadData !! Fortran pointer to the cuModuleLoadData function procedure(cuModuleUnload_interface), pointer, public :: cuModuleUnload !! Fortran pointer to the cuModuleUnload function procedure(cuModuleGetFunction_interface), pointer, public :: cuModuleGetFunction !! Fortran pointer to the cuModuleGetFunction function procedure(cuLaunchKernel_interface), pointer :: cuLaunchKernel_ !! Fortran pointer to the cuLaunchKernel function contains function load_cuda() result(error_code) !! Loads the CUDA Driver library and needed symbols integer(int32) :: error_code !! Error code type(string), allocatable :: func_names(:) error_code = DTFFT_SUCCESS if ( is_loaded ) return allocate(func_names(4)) func_names(1) = string("cuModuleLoadData") func_names(2) = string("cuModuleUnload") func_names(3) = string("cuModuleGetFunction") func_names(4) = string("cuLaunchKernel") error_code = dynamic_load("libcuda.so", func_names, libcuda, cuFunctions) call destroy_strings(func_names) if ( error_code /= DTFFT_SUCCESS ) return call c_f_procpointer(cuFunctions(1), cuModuleLoadData) call c_f_procpointer(cuFunctions(2), cuModuleUnload) call c_f_procpointer(cuFunctions(3), cuModuleGetFunction) call c_f_procpointer(cuFunctions(4), cuLaunchKernel_) is_loaded = .true. end function load_cuda function cuLaunchKernel(func, in, out, blocks, threads, stream, nargs, args) result(cuResult) !! Launches a CUDA kernel type(CUfunction), intent(in) :: func !! Function CUfunction or Kernel CUkernel to launch type(c_ptr), target, intent(in) :: in !! Input pointer type(c_ptr), target, intent(in) :: out !! Output pointer type(dim3), intent(in) :: blocks !! Grid in blocks type(dim3), intent(in) :: threads !! Thread block type(dtfft_stream_t), intent(in) :: stream !! Stream identifier integer(int32), intent(in) :: nargs integer(int32), target, intent(in) :: args(MAX_KERNEL_ARGS) !! Input parameters of kernel `func` integer(c_int) :: cuResult !! Driver result code type(c_ptr) :: params(15) integer(int32) :: i, temp params(:) = c_null_ptr ! Addresses of pointers are required, not the pointers themselves params(1) = c_loc(out) params(2) = c_loc(in) temp = 2 do i = 1, nargs params(temp + i) = c_loc(args(i)) enddo cuResult = cuLaunchKernel_(func, blocks%x, blocks%y, blocks%z, threads%x, threads%y, threads%z, 0, stream, params, c_null_ptr) end function cuLaunchKernel end module dtfft_interface_cuda