dtfft_pencil.F90 Source File


This file depends on

sourcefile~~dtfft_pencil.f90~~EfferentGraph sourcefile~dtfft_pencil.f90 dtfft_pencil.F90 sourcefile~dtfft_interface_cuda_runtime.f90 dtfft_interface_cuda_runtime.F90 sourcefile~dtfft_pencil.f90->sourcefile~dtfft_interface_cuda_runtime.f90 sourcefile~dtfft_parameters.f90 dtfft_parameters.F90 sourcefile~dtfft_pencil.f90->sourcefile~dtfft_parameters.f90 sourcefile~dtfft_utils.f90 dtfft_utils.F90 sourcefile~dtfft_pencil.f90->sourcefile~dtfft_utils.f90 sourcefile~dtfft_interface_cuda_runtime.f90->sourcefile~dtfft_parameters.f90 sourcefile~dtfft_interface_cuda_runtime.f90->sourcefile~dtfft_utils.f90 sourcefile~dtfft_utils.f90->sourcefile~dtfft_parameters.f90

Files dependent on this one

sourcefile~~dtfft_pencil.f90~~AfferentGraph sourcefile~dtfft_pencil.f90 dtfft_pencil.F90 sourcefile~dtfft.f90 dtfft.F90 sourcefile~dtfft.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_plan.f90 dtfft_plan.F90 sourcefile~dtfft.f90->sourcefile~dtfft_plan.f90 sourcefile~dtfft_abstract_backend.f90 dtfft_abstract_backend.F90 sourcefile~dtfft_abstract_backend.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_abstract_executor.f90 dtfft_abstract_executor.F90 sourcefile~dtfft_abstract_executor.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_abstract_transpose_plan.f90 dtfft_abstract_transpose_plan.F90 sourcefile~dtfft_abstract_transpose_plan.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_abstract_transpose_plan.f90->sourcefile~dtfft_abstract_backend.f90 sourcefile~dtfft_api.f90 dtfft_api.F90 sourcefile~dtfft_api.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_api.f90->sourcefile~dtfft_plan.f90 sourcefile~dtfft_backend_cufftmp.f90 dtfft_backend_cufftmp.F90 sourcefile~dtfft_backend_cufftmp.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_backend_cufftmp.f90->sourcefile~dtfft_abstract_backend.f90 sourcefile~dtfft_executor_fftw_m.f90 dtfft_executor_fftw_m.F90 sourcefile~dtfft_executor_fftw_m.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_executor_fftw_m.f90->sourcefile~dtfft_abstract_executor.f90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_abstract_executor.f90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_abstract_transpose_plan.f90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_executor_fftw_m.f90 sourcefile~dtfft_transpose_plan_cuda.f90 dtfft_transpose_plan_cuda.F90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_transpose_plan_cuda.f90 sourcefile~dtfft_transpose_plan_host.f90 dtfft_transpose_plan_host.F90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_transpose_plan_host.f90 sourcefile~dtfft_executor_cufft_m.f90 dtfft_executor_cufft_m.F90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_executor_cufft_m.f90 sourcefile~dtfft_executor_mkl_m.f90 dtfft_executor_mkl_m.F90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_executor_mkl_m.f90 sourcefile~dtfft_executor_vkfft_m.f90 dtfft_executor_vkfft_m.F90 sourcefile~dtfft_plan.f90->sourcefile~dtfft_executor_vkfft_m.f90 sourcefile~dtfft_transpose_handle_cuda.f90 dtfft_transpose_handle_cuda.F90 sourcefile~dtfft_transpose_handle_cuda.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_transpose_handle_cuda.f90->sourcefile~dtfft_abstract_backend.f90 sourcefile~dtfft_transpose_handle_cuda.f90->sourcefile~dtfft_backend_cufftmp.f90 sourcefile~dtfft_backend_mpi.f90 dtfft_backend_mpi.F90 sourcefile~dtfft_transpose_handle_cuda.f90->sourcefile~dtfft_backend_mpi.f90 sourcefile~dtfft_backend_nccl.f90 dtfft_backend_nccl.F90 sourcefile~dtfft_transpose_handle_cuda.f90->sourcefile~dtfft_backend_nccl.f90 sourcefile~dtfft_transpose_handle_host.f90 dtfft_transpose_handle_host.F90 sourcefile~dtfft_transpose_handle_host.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_transpose_plan_cuda.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_transpose_plan_cuda.f90->sourcefile~dtfft_abstract_backend.f90 sourcefile~dtfft_transpose_plan_cuda.f90->sourcefile~dtfft_abstract_transpose_plan.f90 sourcefile~dtfft_transpose_plan_cuda.f90->sourcefile~dtfft_transpose_handle_cuda.f90 sourcefile~dtfft_transpose_plan_host.f90->sourcefile~dtfft_pencil.f90 sourcefile~dtfft_transpose_plan_host.f90->sourcefile~dtfft_abstract_transpose_plan.f90 sourcefile~dtfft_transpose_plan_host.f90->sourcefile~dtfft_transpose_handle_host.f90 sourcefile~dtfft_backend_mpi.f90->sourcefile~dtfft_abstract_backend.f90 sourcefile~dtfft_backend_nccl.f90->sourcefile~dtfft_abstract_backend.f90 sourcefile~dtfft_executor_cufft_m.f90->sourcefile~dtfft_abstract_executor.f90 sourcefile~dtfft_executor_mkl_m.f90->sourcefile~dtfft_abstract_executor.f90 sourcefile~dtfft_executor_vkfft_m.f90->sourcefile~dtfft_abstract_executor.f90

Source Code

!------------------------------------------------------------------------------------------------
! Copyright (c) 2021, Oleg Shatrov
! All rights reserved.
! This file is part of dtFFT library.

! dtFFT is free software: you can redistribute it and/or modify
! it under the terms of the GNU General Public License as published by
! the Free Software Foundation, either version 3 of the License, or
! (at your option) any later version.

! dtFFT is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
! GNU General Public License for more details.

! You should have received a copy of the GNU General Public License
! along with this program.  If not, see <https://www.gnu.org/licenses/>.
!------------------------------------------------------------------------------------------------
#include "dtfft_config.h"
module dtfft_pencil
!! This module describes private [[pencil]] and public [[dtfft_pencil_t]] classes
use iso_fortran_env
use iso_c_binding
use dtfft_parameters
use dtfft_utils
#include "dtfft_mpi.h"
#ifdef DTFFT_WITH_CUDA
use dtfft_interface_cuda_runtime
#include "dtfft_cuda.h"
#endif
implicit none
private
public :: pencil
public :: dtfft_pencil_t
public :: get_local_sizes
public :: get_transpose_type

  type :: dtfft_pencil_t
  !! Structure to hold pencil decomposition info
    integer(int8)               :: dim
      !! Aligned dimension id
    integer(int8)               :: ndims
      !! Number of dimensions
    integer(int32), allocatable :: starts(:)
      !! Local starts, starting from 0 for both C and Fortran
    integer(int32), allocatable :: counts(:)
      !! Local counts of data, in elements
    integer(int64)              :: size
      !! Total number of elements in a pencil
  contains
    final :: destroy_pencil_t
  end type dtfft_pencil_t

  type :: pencil
  !! Class that describes information about data layout
    integer(int8)                :: aligned_dim       !! Position of aligned dimension. For example: X pencil aligned_dim = 1, Z pencil aligned_dim = 3
    integer(int8)                :: rank              !! Rank of buffer: 2 or 3
    integer(int32), allocatable  :: starts(:)         !! Local starts, starting from 0 for both C and Fortran
    integer(int32), allocatable  :: counts(:)         !! Local counts of data, in elements
    logical                      :: is_even           !! Is data evenly distributed across processes
  contains
  private
    procedure, pass(self),  public  :: create         !! Creates pencil
    procedure, pass(self),  public  :: destroy        !! Destroys pencil
    ! procedure, pass(self),  public  :: output         !! Writes pencil data to stdout
    !                                                   !! Used only for debugging purposes
    procedure, pass(self),  public  :: make_public    !! Creates public object that users can use to create own FFT backends
  end type pencil

contains
  subroutine create(self, rank, aligned_dim, counts, comms)
  !! Creates pencil
    class(pencil),      intent(inout) :: self             !! Pencil
    integer(int8),      intent(in)    :: rank             !! Rank of buffer
    integer(int8),      intent(in)    :: aligned_dim      !! Position of aligned dimension
    integer(int32),     intent(in)    :: counts(:)        !! Global counts
    TYPE_MPI_COMM,      intent(in)    :: comms(:)         !! Grid communicators
    integer(int8)                     :: d                !! Counter
    logical, allocatable              :: is_even(:)       !! Even distribution flag

    call self%destroy()
    allocate(self%counts(rank))
    allocate(self%starts(rank))
    allocate(is_even(rank))
    self%aligned_dim = aligned_dim
    self%rank = rank
    do d = 1, rank
      call get_local_size(counts(d), comms(d), self%starts(d), self%counts(d), is_even(d))
    enddo
    self%is_even = all(is_even)
    deallocate(is_even)
  end subroutine create

  subroutine destroy(self)
  !! Destroys pencil
    class(pencil),      intent(inout) :: self             !! Pencil

    if ( allocated(self%counts) ) deallocate(self%counts)
    if ( allocated(self%starts) ) deallocate(self%starts)
  end subroutine destroy

  subroutine get_local_size(n_global, comm, start, count, is_even)
  !! Computes local portions of data based on global count and position inside grid communicator
    integer(int32), intent(in)    :: n_global             !! Global number of points
    TYPE_MPI_COMM,  intent(in)    :: comm                 !! Grid communicator
    integer(int32), intent(out)   :: start                !! Local start
    integer(int32), intent(out)   :: count                !! Local count
    logical,        intent(out)   :: is_even              !! Is data evenly distributed across processes
    integer(int32), allocatable   :: shift(:)             !! Work buffer
    integer(int32)                :: comm_dim             !! Number of MPI processes along n_global
    integer(int32)                :: comm_rank            !! Rank of current MPI process
    integer(int32)                :: res                  !! Residual from n_global / comm_dim
    integer(int32)                :: i                    !! Counter
    integer(int32)                :: ierr                 !! Error code

    call MPI_Comm_size(comm, comm_dim, ierr)
    call MPI_Comm_rank(comm, comm_rank, ierr)

    res = mod(n_global, comm_dim)
    start = 0

    if ( comm_dim == 1 ) then
      count = n_global
      is_even = .true.
      return
    elseif ( comm_rank >= comm_dim - res ) then
      count = int(n_global / comm_dim, int32) + 1
    else
      count = int(n_global / comm_dim, int32)
    endif
    allocate( shift(comm_dim) )
    call MPI_Allgather(count, 1, MPI_INTEGER, shift, 1, MPI_INTEGER, comm, ierr)

    do i = 0, comm_rank - 1
      start = start + shift(i + 1)
    end do
    is_even = all(shift == shift(1))
    deallocate(shift)
  end subroutine get_local_size

!   subroutine output(self, name, vec)
!   !! Writes pencil data to stdout
!     class(pencil),                intent(in)  :: self                 !! Pencil
!     character(len=*),             intent(in)  :: name                 !! Name of pencil
!     type(c_ptr),                  intent(in)  :: vec               !! Device pointer to data
!     integer(int32)                            :: iter                 !! Iteration counter
!     integer(int32)                            :: i,j,k,ijk            !! Counters
!     integer(int32)                            :: comm_size            !! Number of MPI processes
!     integer(int32)                            :: comm_rank            !! Rank of current MPI process
!     integer(int32)                            :: ierr                 !! Error code
! #ifdef DTFFT_WITH_CUDA
!     real(real32),    target,      allocatable :: buf(:)               !! Host buffer
! #endif

!     call MPI_Comm_rank(MPI_COMM_WORLD, comm_rank, ierr)
!     call MPI_Comm_size(MPI_COMM_WORLD, comm_size, ierr)

!     allocate( buf( product(self%counts) ) )

! #ifdef DTFFT_WITH_CUDA
!     if ( is_device_ptr(vec) ) then
!       CUDA_CALL( "cudaDeviceSynchronize", cudaDeviceSynchronize())
!       CUDA_CALL( "cudaMemcpy", cudaMemcpy(c_loc(buf), vec, int(real32, int64) * product(self%counts), cudaMemcpyDeviceToHost) )
!     endif
! #endif

!     do iter = 0, comm_size - 1
!       call MPI_Barrier(MPI_COMM_WORLD, ierr)
!       if ( iter == comm_rank ) then
!         write(output_unit,'(a)') name
!         do k = 0, self%counts(3) - 1
!           do j = 0, self%counts(2) - 1
!             ijk = k * self%counts(2) * self%counts(1) + j * self%counts(1)
! #ifdef DTFFT_WITH_CUDA
!             write(output_unit,'(2i5, *(f9.2))') j, k, (buf(ijk + i + 1), i=0,self%counts(1) - 1)
! #else
!             write(output_unit,'(2i5, *(f9.2))') j, k, (vec(ijk + i + 1), i=0,self%counts(1) - 1)
! #endif
!           enddo
!           write(output_unit, '(a)') ' '
!           flush(output_unit)
!         enddo
!         write(output_unit, '(a)') ' '
!         write(output_unit, '(a)') ' '
!         flush(output_unit)
!       endif
!       call MPI_Barrier(MPI_COMM_WORLD, ierr)
!     enddo

! #ifdef DTFFT_WITH_CUDA
!     deallocate(buf)
! #endif
!   end subroutine output

  type(dtfft_pencil_t) function make_public(self)
  !! Creates public object that users can use to create own FFT backends
    class(pencil),  intent(in)  :: self                 !! Pencil
    integer(int8) :: i  !! Counter

    make_public%dim = self%aligned_dim
    make_public%ndims = self%rank
    if ( allocated(make_public%counts) ) deallocate( make_public%counts )
    if ( allocated(make_public%starts) ) deallocate( make_public%starts )
    allocate(make_public%counts(1:self%rank), source=self%counts)
    allocate(make_public%starts(1:self%rank), source=self%starts)
    make_public%size = 1_int64
    do i = 1, make_public%ndims
      make_public%size = make_public%size * int(make_public%counts(i), int64)
    enddo
  end function make_public

  subroutine destroy_pencil_t(self)
  !! Destroys pencil
    type(dtfft_pencil_t), intent(inout) :: self !! Public pencil

    if ( allocated(self%counts) ) deallocate( self%counts )
    if ( allocated(self%starts) ) deallocate( self%starts )
    self%dim = -1
    self%ndims = -1
    self%size = -1
  end subroutine destroy_pencil_t

  subroutine get_local_sizes(pencils, in_starts, in_counts, out_starts, out_counts, alloc_size)
  !! Obtain local starts and counts in `real` and `fourier` spaces
    type(pencil),             intent(in)  :: pencils(:)             !! Array of pencils
    integer(int32), optional, intent(out) :: in_starts(:)           !! Start indexes in `real` space (0-based)
    integer(int32), optional, intent(out) :: in_counts(:)           !! Number of elements in `real` space
    integer(int32), optional, intent(out) :: out_starts(:)          !! Start indexes in `fourier` space (0-based)
    integer(int32), optional, intent(out) :: out_counts(:)          !! Number of elements in `fourier` space
    integer(int64), optional, intent(out) :: alloc_size             !! Minimal number of elements required to execute plan
    integer(int8)                         :: d                      !! Counter
    integer(int8)                         :: ndims                  !! Number of dimensions

    ndims = size(pencils, kind=int8)
    if ( present(in_starts) )  in_starts(1:ndims)   = pencils(1)%starts(1:ndims)
    if ( present(in_counts) )  in_counts(1:ndims)   = pencils(1)%counts(1:ndims)
    if ( present(out_starts) ) out_starts(1:ndims)  = pencils(ndims)%starts(1:ndims)
    if ( present(out_counts) ) out_counts(1:ndims)  = pencils(ndims)%counts(1:ndims)
    if ( present(alloc_size) ) alloc_size = maxval([(product(pencils(d)%counts), d=1,ndims)])
  end subroutine get_local_sizes

  pure function get_transpose_type(send, recv) result(transpose_type)
  !! Determines transpose ID based on pencils
    type(pencil),     intent(in)  :: send           !! Send pencil
    type(pencil),     intent(in)  :: recv           !! Receive pencil
    type(dtfft_transpose_t)       :: transpose_type !! Transpose ID

    transpose_type = dtfft_transpose_t(0)
    if (send%aligned_dim == 1 .and. recv%aligned_dim == 2) then
      transpose_type = DTFFT_TRANSPOSE_X_TO_Y
    else if (recv%aligned_dim == 1 .and. send%aligned_dim == 2) then
      transpose_type = DTFFT_TRANSPOSE_Y_TO_X
    else if (send%aligned_dim == 1 .and. recv%aligned_dim == 3) then
      transpose_type = DTFFT_TRANSPOSE_X_TO_Z
    else if (recv%aligned_dim == 1 .and. send%aligned_dim == 3) then
      transpose_type = DTFFT_TRANSPOSE_Z_TO_X
    else if (send%aligned_dim == 2 .and. recv%aligned_dim == 3) then
      transpose_type = DTFFT_TRANSPOSE_Y_TO_Z
    else if (recv%aligned_dim == 2 .and. send%aligned_dim == 3) then
      transpose_type = DTFFT_TRANSPOSE_Z_TO_Y
    endif
  end function get_transpose_type
end module dtfft_pencil