This module describes NVRTC Kernel class nvrtc_kernel
Type | Visibility | Attributes | Name | Initial | |||
---|---|---|---|---|---|---|---|
integer(kind=int32), | public, | parameter | :: | DEF_TILE_SIZE | = | 32 |
Default tile size |
character(len=*), | private, | parameter | :: | DEFAULT_KERNEL_NAME | = | "dtfft_kernel" |
Basic kernel name |
integer(kind=int32), | private, | parameter | :: | TARGET_THREADS_PER_BLOCK | = | 256 |
Target number of threads per block for unpacked kernels |
nvRTC Compiled kernel class
Type | Visibility | Attributes | Name | Initial | |||
---|---|---|---|---|---|---|---|
logical, | private | :: | is_created | = | .false. |
Kernel is created flag. |
|
logical, | private | :: | is_dummy | = | .false. |
If kernel should do anything or not. |
|
type(CUfunction), | private | :: | cuda_kernel |
Pointer to CUDA kernel. |
|||
type(dim3), | private | :: | blocks |
Grid of blocks. |
|||
type(dim3), | private | :: | threads |
Thread block. |
|||
type(kernel_type_t), | private | :: | kernel_type |
Type of kernel to execute. |
|||
type(kernelArgs), | private | :: | kernelParams |
Kernel arguments. |
|||
integer(kind=int32), | private, | allocatable | :: | pointers(:,:) |
Optional pointers that hold info about counts and displacements
in |
||
type(c_ptr), | private | :: | device_pointers(3) |
Device pointers for kernel arguments. |
|||
logical, | private | :: | has_device_pointers |
Flag indicating if device pointers are present |
|||
integer(kind=int64), | private | :: | copy_bytes |
Number of bytes to copy for |
procedure, public, pass(self) :: create | ../../ Creates kernel |
procedure, public, pass(self) :: execute | ../../ Executes kernel |
procedure, public, pass(self) :: destroy | ../../ Destroys kernel |
Compiles kernel stored in code
and caches pointer to CUfunction
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(MPI_Comm), | intent(in) | :: | comm |
MPI Communicator |
||
character(len=*), | intent(in) | :: | kernel_name |
Kernel name |
||
type(kernel_type_t), | intent(in) | :: | kernel_type |
Type of kernel to build |
||
type(dtfft_transpose_t), | intent(in) | :: | transpose_type |
Type of transposition to perform |
||
type(kernel_codegen), | intent(in) | :: | code |
Kernel code to compile |
||
type(device_props), | intent(in) | :: | props |
GPU architecture properties |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
integer(kind=int32), | intent(in) | :: | tile_size |
Tile size to use in shared memory |
||
integer(kind=int32), | intent(in) | :: | padding |
Padding to use in shared memory |
Compiled kernel to return
Populates kernel arguments based on kernel type
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(MPI_Comm), | intent(in) | :: | comm | |||
integer(kind=int32), | intent(in) | :: | dims(:) |
Local dimensions to process |
||
type(dtfft_transpose_t), | intent(in) | :: | transpose_type |
Type of transposition to perform |
||
type(kernel_type_t), | intent(in) | :: | kernel_type |
Type of kernel to build |
||
integer(kind=int32), | intent(in) | :: | block_rows |
Number of rows in each block |
||
type(c_ptr), | intent(in) | :: | ptrs(3) | |||
type(kernelArgs), | intent(out) | :: | params |
Kernel arguments |
Compiles kernel and caches it. Returns compiled kernel.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(MPI_Comm), | intent(in) | :: | comm |
MPI Communicator |
||
integer(kind=int32), | intent(in) | :: | dims(:) |
Local dimensions to process |
||
type(dtfft_transpose_t), | intent(in) | :: | transpose_type |
Type of transposition to perform |
||
type(kernel_type_t), | intent(in) | :: | kernel_type |
Type of kernel to build |
||
type(dtfft_effort_t), | intent(in) | :: | effort |
How thoroughly |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
type(device_props), | intent(in) | :: | props |
GPU architecture properties |
||
type(c_ptr), | intent(in) | :: | ptrs(3) |
Array of device pointers required by certain kernels |
||
type(dim3), | intent(out) | :: | blocks |
Selected grid of blocks |
||
type(dim3), | intent(out) | :: | threads |
Selected thread configuration |
||
type(CUfunction), | intent(out) | :: | kernel |
Compiled kernel to return |
||
logical, | intent(in), | optional | :: | force_effort |
Should effort be forced or not |
Creates kernel
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(nvrtc_kernel), | intent(inout) | :: | self |
nvRTC Compiled kernel class |
||
type(MPI_Comm), | intent(in) | :: | comm |
MPI Communicator |
||
integer(kind=int32), | intent(in) | :: | dims(:) |
Local dimensions to process |
||
type(dtfft_effort_t), | intent(in) | :: | effort |
Effort level for generating transpose kernels |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
type(dtfft_transpose_t), | intent(in) | :: | transpose_type |
Type of transposition to perform |
||
type(kernel_type_t), | intent(in) | :: | kernel_type |
Type of kernel to build |
||
integer(kind=int32), | intent(in), | optional | :: | pointers(:,:) |
Optional pointers to unpack kernels |
|
logical, | intent(in), | optional | :: | force_effort |
Should effort be forced or not |
Executes kernel on stream
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(nvrtc_kernel), | intent(inout) | :: | self |
nvRTC Compiled kernel class |
||
real(kind=real32), | intent(in), | target | :: | in(:) |
Source pointer |
|
real(kind=real32), | intent(in), | target | :: | out(:) |
Target pointer |
|
type(dtfft_stream_t), | intent(in) | :: | stream |
CUDA Stream |
||
integer(kind=int32), | intent(in), | optional | :: | source |
Source rank for pipelined unpacking |
Destroys kernel
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
class(nvrtc_kernel), | intent(inout) | :: | self |
nvRTC Compiled kernel class |
Allocates memory on a device and copies values
to it.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(c_ptr), | intent(inout) | :: | ptr |
Device pointer |
||
integer(kind=c_int), | intent(in), | target | :: | values(:) |
Values to copy |
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(MPI_Comm), | intent(in) | :: | comm |
MPI Communicator |
||
character(len=*), | intent(in) | :: | kernel_name | |||
integer(kind=int32), | intent(in) | :: | dims(:) |
Local dimensions to process |
||
type(dtfft_transpose_t), | intent(in) | :: | transpose_type |
Type of transposition to perform |
||
type(kernel_type_t), | intent(in) | :: | kernel_type |
Type of kernel to build |
||
integer(kind=int64), | intent(in) | :: | base_storage |
Number of bytes needed to store single element |
||
type(device_props), | intent(in) | :: | props | |||
type(kernel_config), | intent(in) | :: | config | |||
type(dim3), | intent(out) | :: | blocks | |||
type(dim3), | intent(out) | :: | threads | |||
type(CUfunction), | intent(out) | :: | kernel |
Compiled kernel to return |