Commit 05ca1675 authored by Andreas Marek's avatar Andreas Marek

Disentangle 64bit integer blas from MPI

parent 043ddf39
......@@ -87,7 +87,7 @@ gnu-gnu-nompi-noopenmp-ilp64:
when: on_success
expire_in: 2 month
script:
- ./ci_test_scripts/run_ci_tests.sh -c "CC=\"gcc\" CFLAGS=\"-O3 -mavx\" FC=\"gfortran\" FCFLAGS=\"-O3 -mavx\" SCALAPACK_LDFLAGS=\"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_NOOMP_ILP64 \" SCALAPACK_FCFLAGS=\"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_NOOMP_ILP64 \" --enable-option-checking=fatal --with-mpi=no --disable-openmp --disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-support || { cat config.log; exit 1; }" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE -s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM
- ./ci_test_scripts/run_ci_tests.sh -c "CC=\"gcc\" CFLAGS=\"-O3 -mavx\" FC=\"gfortran\" FCFLAGS=\"-O3 -mavx\" SCALAPACK_LDFLAGS=\"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_NOOMP_ILP64 \" SCALAPACK_FCFLAGS=\"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_NOOMP_ILP64 \" --enable-option-checking=fatal --with-mpi=no --disable-openmp --disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE -s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM
# gnu-gnu-ilp64-nompi-openmp
......@@ -98,7 +98,7 @@ gnu-gnu-nompi-openmp-ilp64:
when: on_success
expire_in: 2 month
script:
- ./ci_test_scripts/run_ci_tests.sh -c "CC=\"gcc\" CFLAGS=\"-O3 -mavx\" FC=\"gfortran\" FCFLAGS=\"-O3 -mavx\" SCALAPACK_LDFLAGS=\"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_OMP_ILP64 \" SCALAPACK_FCFLAGS=\"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_OMP_ILP64 \" --enable-option-checking=fatal --with-mpi=no --enable-openmp --disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-support || { cat config.log; exit 1; }" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE -s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM
- ./ci_test_scripts/run_ci_tests.sh -c "CC=\"gcc\" CFLAGS=\"-O3 -mavx\" FC=\"gfortran\" FCFLAGS=\"-O3 -mavx\" SCALAPACK_LDFLAGS=\"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_OMP_ILP64 \" SCALAPACK_FCFLAGS=\"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_OMP_ILP64 \" --enable-option-checking=fatal --with-mpi=no --enable-openmp --disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE -s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM
# python tests
......
......@@ -348,7 +348,7 @@ ilp64_no_omp_tests = [
'SCALAPACK_LDFLAGS=\\"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_NOOMP_ILP64 \\" '
'SCALAPACK_FCFLAGS=\\"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_NOOMP_ILP64 \\" '
'--enable-option-checking=fatal --with-mpi=no --disable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-support || { cat config.log; exit 1; }'
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM',
"\n",
......@@ -366,7 +366,7 @@ ilp64_no_omp_tests = [
'SCALAPACK_LDFLAGS=\\"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_OMP_ILP64 \\" '
'SCALAPACK_FCFLAGS=\\"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_OMP_ILP64 \\" '
'--enable-option-checking=fatal --with-mpi=no --enable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-support || { cat config.log; exit 1; }'
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM',
"\n",
......
......@@ -213,21 +213,21 @@ dnl first long int
AC_CHECK_SIZEOF([long int])
size_of_long_int="${ac_cv_sizeof_long_int}"
dnl then 64bit
dnl then 64bit blas
AC_MSG_CHECKING(whether 64bit integers should be used for math libraries (BLAS/LAPACK/SCALAPACK))
AC_ARG_ENABLE([64bit-integer-support],
AS_HELP_STRING([--64bit-integer-support],
AC_ARG_ENABLE([64bit-integer-math-support],
AS_HELP_STRING([--64bit-integer-math-support],
[allows to link against the 64bit integer versions of the math libraries BLAS, LAPACK, and SCALAPACK]),
[
if test x"$enableval" = x"yes"; then
enable_64bit_integer_support=yes
enable_64bit_integer_math_support=yes
else
enable_64bit_integer_support=no
enable_64bit_integer_math_support=no
fi
],
[enable_64bit_integer_support="no"])
AC_MSG_RESULT([$enable_64bit_integer_support])
if test x"${enable_64bit_integer_support}" = x"yes"; then
[enable_64bit_integer_math_support="no"])
AC_MSG_RESULT([$enable_64bit_integer_math_support])
if test x"${enable_64bit_integer_math_support}" = x"yes"; then
if test x"${enable_legacy}" = x"yes"; then
AC_MSG_ERROR([You cannot both enable 64bit integer support and the legacy interface!])
fi
......@@ -242,9 +242,32 @@ if test x"${enable_64bit_integer_support}" = x"yes"; then
AC_MSG_ERROR([The C data-type "long int" is only ${size_of_long_int} bytes; Needed is 8 bytes])
fi
AC_DEFINE([HAVE_64BIT_INTEGER_SUPPORT], [1], [allow to link against the 64bit integer versions of math libraries])
AC_DEFINE([HAVE_64BIT_INTEGER_MATH_SUPPORT], [1], [allow to link against the 64bit integer versions of math libraries])
fi
AM_CONDITIONAL([HAVE_64BIT_INTEGER_SUPPORT],[test x"$enable_64bit_integer_support" = x"yes"])
AM_CONDITIONAL([HAVE_64BIT_INTEGER_MATH_SUPPORT],[test x"$enable_64bit_integer_math_support" = x"yes"])
dnl then 64bit blas
AC_MSG_CHECKING(whether 64bit integers should be used for the MPI library)
AC_ARG_ENABLE([64bit-integer-mpi-support],
AS_HELP_STRING([--64bit-integer-mpi-support],
[allows to link against the 64bit integer versions of the MPI library]),
[
if test x"$enableval" = x"yes"; then
enable_64bit_integer_mpi_support=yes
else
enable_64bit_integer_mpi_support=no
fi
],
[enable_64bit_integer_mpi_support="no"])
AC_MSG_RESULT([$enable_64bit_integer_mpi_support])
if test x"${enable_64bit_integer_mpi_support}" = x"yes"; then
if test x"${enable_legacy}" = x"yes"; then
AC_MSG_ERROR([You cannot both enable 64bit integer support and the legacy interface!])
fi
AC_DEFINE([HAVE_64BIT_INTEGER_MPI_SUPPORT], [1], [allow to link against the 64bit integer versions of the MPI library])
fi
AM_CONDITIONAL([HAVE_64BIT_INTEGER_MPI_SUPPORT],[test x"$enable_64bit_integer_mpi_support" = x"yes"])
AC_MSG_CHECKING(whether C compiler can use _Generic )
......
......@@ -171,8 +171,10 @@
#endif
integer(kind=ik) :: lr_end
integer(kind=ik) :: na_cols
integer(kind=BLAS_KIND) :: na_colsBLAS
#if COMPLEXCASE == 1
integer(kind=ik) :: na_rows
integer(kind=BLAS_KIND) :: na_rowsBLAS
#endif
logical, intent(in) :: wantDebug
......@@ -256,11 +258,13 @@
if (useGPU) then
#ifdef WITH_MPI
#if COMPLEXCASE == 1
na_rows = numroc(int(na,kind=MPI_KIND), int(nblk,kind=MPI_KIND), &
int(my_prow,kind=MPI_KIND), 0_MPI_KIND, int(np_rows,kind=MPI_KIND))
na_rowsBLAS = numroc(int(na,kind=BLAS_KIND), int(nblk,kind=BLAS_KIND), &
int(my_prow,kind=BLAS_KIND), 0_BLAS_KIND, int(np_rows,kind=BLAS_KIND))
na_rows = int(na_rowsBLAS,kind=c_int)
#endif
na_cols = numroc(int(na,kind=MPI_KIND), int(nblk,kind=MPI_KIND), &
int(my_pcol,kind=MPI_KIND), 0_MPI_KIND, int(np_cols,kind=MPI_KIND))
na_colsBLAS = numroc(int(na,kind=BLAS_KIND), int(nblk,kind=BLAS_KIND), &
int(my_pcol,kind=BLAS_KIND), 0_BLAS_KIND, int(np_cols,kind=BLAS_KIND))
na_cols = int(na_colsBLAS,kind=c_int)
#else
#if COMPLEXCASE == 1
na_rows = na
......
......@@ -64,7 +64,7 @@ subroutine qr_pdlarfb_1dcomm_&
! local scalars
integer(kind=ik) :: localsize,offset,baseoffset
integer(kind=ik) :: mpirank, mpiprocs
integer(kind=BLAS_KIND) :: mpirankMPI, mpiprocsMPI, mpierr
integer(kind=MPI_KIND) :: mpirankMPI, mpiprocsMPI, mpierr
if (idx .le. 1) return
......
......@@ -60,13 +60,26 @@
#include <math.h>
#include <complex.h>
#ifdef HAVE_64BIT_INTEGER_SUPPORT
#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT
#define C_INT_TYPE_PTR long int*
#define C_INT_TYPE long int
#define BLAS_KIND c_int64_t
#else
#define C_INT_TYPE_PTR int*
#define C_INT_TYPE int
#define BLAS_KIND c_int
#endif
#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT
#define C_INT_MPI_TYPE_PTR long int*
#define C_INT_MPI_TYPE long int
#define MPI_KIND c_int64_t
#else
#define C_INT_MPI_TYPE_PTR int*
#define C_INT_MPI_TYPE int
#define MPI_KIND c_int
#endif
// most of the file is not compiled if not using MPI
#ifdef WITH_MPI
......@@ -84,69 +97,37 @@
#undef REALCASE
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_d(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_d")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> real(c_double) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> real(c_double) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int64_t) :: a_desc(9)
!f> integer(kind=BLAS_KIND) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm, ToStore
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_d(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_d")
!f> use, intrinsic :: iso_c_binding
!f> real(c_double) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> real(c_double) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_reduction_c_d(double* A, double* U, int local_rowsCast, int local_colsCast, C_INT_TYPE_PTR a_desc,
double *Res, C_INT_TYPE ToStore, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
double *Res, C_INT_MPI_TYPE ToStore, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_d(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_d")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> real(c_double) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> real(c_double) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int64_t) :: u_desc(9), b_desc(9)
!f> integer(kind=BLAS_KIND) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_d(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_d")
!f> use, intrinsic :: iso_c_binding
!f> real(c_double) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> real(c_double) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_triang_rectangular_c_d(double* U, double* B, int local_rowsCast, int local_colsCast,
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, double *Res, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, double *Res, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
//***********************************************************************************************************
......@@ -159,68 +140,37 @@ void cannons_triang_rectangular_c_d(double* U, double* B, int local_rowsCast, in
#undef REALCASE
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_f(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_f")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> real(c_float) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> real(c_float) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int64_t) :: a_desc(9)
!f> integer(kind=BLAS_KIND) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm, ToStore
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_f(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_f")
!f> use, intrinsic :: iso_c_binding
!f> real(c_float) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> real(c_float) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_reduction_c_f(float* A, float* U, int local_rowsCast, int local_colsCast, C_INT_TYPE_PTR a_desc,
float *Res, C_INT_TYPE ToStore, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
float *Res, C_INT_MPI_TYPE ToStore, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_f(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_f")
!f> use, intrinsic :: iso_c_binding
!f> real(c_float) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> real(c_float) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int64_t) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_f(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_f")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> real(c_float) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> real(c_float) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int) :: u_desc(9), b_desc(9)
!f> integer(kind=BLAS_KIND) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_triang_rectangular_c_f(float* U, float* B, int local_rowsCast, int local_colsCast,
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, float *Res, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, float *Res, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
//***********************************************************************************************************
......@@ -233,68 +183,36 @@ void cannons_triang_rectangular_c_f(float* U, float* B, int local_rowsCast, int
#undef COMPLEXCASE
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_dc(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_dc")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> complex(c_double) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> complex(c_double) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int64_t) :: a_desc(9)
!f> integer(kind=BLAS_KIND) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_dc(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_dc")
!f> use, intrinsic :: iso_c_binding
!f> complex(c_double) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> complex(c_double) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm, ToStore
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_reduction_c_dc(double complex* A, double complex* U, int local_rowsCast, int local_colsCasr, C_INT_TYPE_PTR a_desc,
double complex *Res, C_INT_TYPE ToStore, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
double complex *Res, C_INT_MPI_TYPE ToStore, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_dc(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_dc")
!f> use, intrinsic :: iso_c_binding
!f> complex(c_double) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> complex(c_double) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int64_t) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_dc(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_dc")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> complex(c_double) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> complex(c_double) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int) :: u_desc(9), b_desc(9)
!f> integer(kind=BLAS_KIND) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_triang_rectangular_c_dc(double complex* U, double complex* B, int local_rowsCast, int local_colsCast,
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, double complex *Res, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, double complex *Res, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
//***********************************************************************************************************
#define COMPLEXCASE 1
......@@ -306,66 +224,36 @@ void cannons_triang_rectangular_c_dc(double complex* U, double complex* B, int l
#undef COMPLEXCASE
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_fc(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_fc")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> complex(c_float) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> complex(c_float) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int64_t) :: a_desc(9)
!f> integer(kind=BLAS_KIND) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm, ToStore
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_reduction_fc(A, U, local_rowsCast, local_colsCast, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_fc")
!f> use, intrinsic :: iso_c_binding
!f> complex(c_float) :: A(local_rowsCast, local_colsCast), U(local_rowsCast, local_colsCast)
!f> complex(c_float) :: Res(local_rowsCast, local_colsCast)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_reduction_c_fc(float complex* A, float complex* U, int local_rowsCast, int local_colsCast, C_INT_TYPE_PTR a_desc,
float complex *Res, C_INT_TYPE ToStore, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
float complex *Res, C_INT_MPI_TYPE ToStore, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
/*
!f>#ifdef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_fc(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_fc")
!f> use, intrinsic :: iso_c_binding
!f> use precision
!f> complex(c_float) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> complex(c_float) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int64_t) :: u_desc(9), b_desc(9)
!f> integer(kind=BLAS_KIND) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int64_t),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
!f>#ifndef HAVE_64BIT_INTEGER_SUPPORT
!f> interface
!f> subroutine cannons_triang_rectangular_fc(U, B, local_rowsCast, local_colsCast, u_desc, b_desc, Res, row_comm, col_comm) &
!f> bind(C, name="cannons_triang_rectangular_c_fc")
!f> use, intrinsic :: iso_c_binding
!f> complex(c_float) :: U(local_rowsCast, local_colsCast), B(local_rowsCast, local_colsCast)
!f> complex(c_float) :: Res(local_rowsCast, local_colsCast)
!f> integer(kind=c_int) :: u_desc(9), b_desc(9)
!f> integer(kind=c_int),value :: local_rowsCast, local_colsCast
!f> integer(kind=c_int),value :: row_comm, col_comm
!f> integer(kind=MPI_KIND),value :: row_comm, col_comm
!f> end subroutine
!f> end interface
!f>#endif
*/
void cannons_triang_rectangular_c_fc(float complex* U, float complex* B, int local_rowsCast, int local_colsCast,
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, float complex *Res, C_INT_TYPE row_comm, C_INT_TYPE col_comm);
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, float complex *Res, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm);
#endif
......@@ -54,16 +54,24 @@
// Author: Valeriy Manin (Bergische Universität Wuppertal)
// integreated into the ELPA library Pavel Kus, Andeas Marek (MPCDF)
#ifdef HAVE_64BIT_INTEGER_SUPPORT
#ifdef HAVE_64BIT_INTEGER_MATH_SUPPORT
#define C_INT_TYPE_PTR long int*
#define C_INT_TYPE long int
#define FORTRAN_INT_TYPE c_int64_t
#define BLAS_KIND c_int64_t
#else
#define C_INT_TYPE_PTR int*
#define C_INT_TYPE int
#define FORTRAN_INT_TYPE c_int
#define BLAS_KIND c_int
#endif
#ifdef HAVE_64BIT_INTEGER_MPI_SUPPORT
#define C_INT_MPI_TYPE_PTR long int*
#define C_INT_MPI_TYPE long int
#define MPI_KIND c_int64_t
#else
#define C_INT_MPI_TYPE_PTR int*
#define C_INT_MPI_TYPE int
#define MPI_KIND c_int
#endif
// it seems, that we need those two levels of indirection to correctly expand macros
#define cannons_triang_rectangular_impl_expand2(SUFFIX) cannons_triang_rectangular_##SUFFIX
......@@ -88,6 +96,7 @@ void cannons_triang_rectangular_impl(math_type* U, math_type* B, C_INT_TYPE np_r
C_INT_TYPE na, nb, nblk, width, na_rows, na_cols, nb_cols, cols_in_buffer_U_my_initial, cols_in_buffer_U, rows_in_buffer_U, Size_receive_U_now, rows_in_buffer_U_now, cols_in_buffer_U_now, rows_in_buffer_U_my_initial;
C_INT_MPI_TYPE Size_receive_U_nowMPI, Size_receive_UMPI, Size_receive_BMPI;
C_INT_TYPE i, j, Size_send_U, Size_receive_U, Size_send_B, Size_receive_B, intNumber, Buf_rows, Buf_cols_U, Buf_cols_B, curr_rows, num_of_iters, cols_in_buffer, rows_in_block, curr_col_loc, cols_in_block, num_of_blocks_in_U_buffer, col_of_origin_U, b_rows_mult, b_cols_mult;
math_type *Buf_to_send_U, *Buf_to_receive_U, *Buf_to_send_B, *Buf_to_receive_B, *Buf_U, *PosBuff;
......@@ -241,9 +250,10 @@ void cannons_triang_rectangular_impl(math_type* U, math_type* B, C_INT_TYPE np_r
{
if(where_to_send_U != my_pcol) // if I need to send and receive on this step
{
MPI_Sendrecv(Buf_to_send_U, Size_send_U, MPI_MATH_DATATYPE_PRECISION_C, where_to_send_U, 0, Buf_U, Size_U_stored, MPI_MATH_DATATYPE_PRECISION_C, from_where_to_receive_U, 0, row_comm, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_U_now);
Size_receive_U = Size_receive_U + Size_receive_U_now - 2; // we need only number of elements, so exclude information about cols_in_buffer_U and rows_in_buffer_U
MPI_Sendrecv(Buf_to_send_U, (C_INT_MPI_TYPE) Size_send_U, MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) where_to_send_U, 0, Buf_U, (C_INT_MPI_TYPE) Size_U_stored, MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) from_where_to_receive_U, 0, row_comm, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_U_nowMPI);
Size_receive_U_now = (C_INT_TYPE) Size_receive_U_nowMPI;
Size_receive_U = Size_receive_U + Size_receive_U_now - 2; // we need only number of elements, so exclude information about cols_in_buffer_U and rows_in_buffer_
cols_in_buffer_U_now = Buf_U[Size_receive_U_now - 2];
cols_in_buffer_U = cols_in_buffer_U + cols_in_buffer_U_now;
......@@ -309,8 +319,10 @@ void cannons_triang_rectangular_impl(math_type* U, math_type* B, C_INT_TYPE np_r
{
if(my_prow > 0)
{
MPI_Sendrecv(Buf_to_send_U, Size_send_U, MPI_MATH_DATATYPE_PRECISION_C, where_to_send_U, 0, Buf_to_receive_U, Size_U_stored, MPI_MATH_DATATYPE_PRECISION_C, from_where_to_receive_U, 0, row_comm, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_U);
MPI_Sendrecv(Buf_to_send_U, (C_INT_MPI_TYPE) Size_send_U, MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) where_to_send_U, 0, Buf_to_receive_U, (C_INT_MPI_TYPE) Size_U_stored, MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) from_where_to_receive_U, 0, row_comm, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_UMPI);
Size_receive_U = (C_INT_TYPE) Size_receive_UMPI;
cols_in_buffer_U = (C_INT_TYPE)Buf_to_receive_U[Size_receive_U-2];
rows_in_buffer_U = (C_INT_TYPE)Buf_to_receive_U[Size_receive_U-1];
}
......@@ -341,9 +353,11 @@ void cannons_triang_rectangular_impl(math_type* U, math_type* B, C_INT_TYPE np_r
{
// form array to send
C_LACPY("A", &na_rows, &nb_cols, B, &na_rows, Buf_to_send_B, &na_rows);
MPI_Sendrecv(Buf_to_send_B, nb_cols*na_rows, MPI_MATH_DATATYPE_PRECISION_C, where_to_send_B, 0, Buf_to_receive_B, nb_cols*Buf_rows, MPI_MATH_DATATYPE_PRECISION_C, from_where_to_receive_B, 0, col_comm, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_B); // find out how many elements I have received
MPI_Sendrecv(Buf_to_send_B, (C_INT_MPI_TYPE) (nb_cols*na_rows), MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) where_to_send_B, 0, Buf_to_receive_B, (C_INT_MPI_TYPE) (nb_cols*Buf_rows), MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) from_where_to_receive_B, 0, col_comm, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_BMPI); // find out how many elements I have received
Size_receive_B = (C_INT_TYPE) Size_receive_BMPI;
Size_receive_B = Size_receive_B/nb_cols; // how many rows I have received
}
else
{
......@@ -378,13 +392,11 @@ void cannons_triang_rectangular_impl(math_type* U, math_type* B, C_INT_TYPE np_r
Size_send_B = Size_receive_B;
///// shift for U ////////////////////////////////////////////////////////////
MPI_Isend(Buf_to_send_U, Size_send_U, MPI_MATH_DATATYPE_PRECISION_C, where_to_send_U, 0, row_comm, &request_U_Send);
MPI_Irecv(Buf_to_receive_U, ratio*Size_U_stored, MPI_MATH_DATATYPE_PRECISION_C, from_where_to_receive_U, 0, row_comm, &request_U_Recv);
MPI_Isend(Buf_to_send_U, (C_INT_MPI_TYPE) Size_send_U, MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) where_to_send_U, 0, row_comm, &request_U_Send);
MPI_Irecv(Buf_to_receive_U, (C_INT_MPI_TYPE) (ratio*Size_U_stored), MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) from_where_to_receive_U, 0, row_comm, &request_U_Recv);
///// shift for B /////////////////////////////////////////////
MPI_Isend(Buf_to_send_B, Size_send_B*nb_cols, MPI_MATH_DATATYPE_PRECISION_C, where_to_send_B, 0, col_comm, &request_B_Send);
MPI_Irecv(Buf_to_receive_B, Buf_rows*nb_cols, MPI_MATH_DATATYPE_PRECISION_C, from_where_to_receive_B, 0, col_comm, &request_B_Recv);
MPI_Isend(Buf_to_send_B, (C_INT_MPI_TYPE) (Size_send_B*nb_cols), MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) where_to_send_B, 0, col_comm, &request_B_Send);
MPI_Irecv(Buf_to_receive_B, (C_INT_MPI_TYPE) (Buf_rows*nb_cols), MPI_MATH_DATATYPE_PRECISION_C, (C_INT_MPI_TYPE) from_where_to_receive_B, 0, col_comm, &request_B_Recv);
///// multiplication ////////////////////////////////////////////////////////////////////////////////////////////
cols_in_buffer_U = (C_INT_TYPE)Buf_to_send_U[Size_receive_U-2];
rows_in_buffer_U = (C_INT_TYPE)Buf_to_send_U[Size_receive_U-1];
......@@ -426,12 +438,15 @@ void cannons_triang_rectangular_impl(math_type* U, math_type* B, C_INT_TYPE np_r
MPI_Wait(&request_U_Send, &status);
MPI_Wait(&request_U_Recv, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_U); // find out how many elements I have received
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_UMPI); // find out how many elements I have received
Size_receive_U = (C_INT_TYPE) Size_receive_UMPI;
MPI_Wait(&request_B_Send, &status);
MPI_Wait(&request_B_Recv, &status);
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_B); // find out how many elements I have received
Size_receive_B = Size_receive_B/nb_cols; // how many rows I have received
MPI_Get_count(&status, MPI_MATH_DATATYPE_PRECISION_C, &Size_receive_BMPI); // find out how many elements I have received
Size_receive_B = (C_INT_TYPE) Size_receive_BMPI;
Size_receive_B = (C_INT_TYPE) Size_receive_B / nb_cols; // how many rows I have received
}
// last iteration
......@@ -483,7 +498,7 @@ void cannons_triang_rectangular_impl(math_type* U, math_type* B, C_INT_TYPE np_r
void cannons_triang_rectangular_c_impl(math_type* U, math_type* B, int local_rowsCast, int local_colsCast,
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, math_type *Res, C_INT_TYPE row_comm, C_INT_TYPE col_comm)
C_INT_TYPE_PTR u_desc, C_INT_TYPE_PTR b_desc, math_type *Res, C_INT_MPI_TYPE row_comm, C_INT_MPI_TYPE col_comm)
{
C_INT_TYPE local_rows, local_cols;
......@@ -494,11 +509,17 @@ void cannons_triang_rectangular_c_impl(math_type* U, math_type* B, int local_row
MPI_Comm c_col_comm = MPI_Comm_f2c(col_comm);
C_INT_TYPE my_prow, my_pcol, np_rows, np_cols;
MPI_Comm_rank(c_row_comm, &my_prow);
MPI_Comm_size(c_row_comm, &np_rows);
MPI_Comm_rank(c_col_comm, &my_pcol);