Commit f20ce4de authored by Pavel Kus's avatar Pavel Kus

introducing a template for cannon_forw

parent 01e7863c
......@@ -60,7 +60,7 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa2/qr/elpa_pdgeqrf.F90 \
src/elpa1/elpa1.F90 \
src/elpa2/elpa2.F90 \
src/elpa_generalized/cannon_forw_real_double.c \
src/elpa_generalized/cannon_forw.c \
src/elpa_generalized/cannon_back_real_double.c \
#src/elpa_generalized/test_c_bindings.c \
src/helpers/matrix_plot.F90 \
......@@ -744,6 +744,7 @@ EXTRA_DIST = \
src/elpa2/qr/elpa_qrkernels_template.F90 \
src/elpa2/qr/qr_utils_template.F90 \
src/elpa2/redist_band.F90 \
src/elpa_generalized/cannon_forw_template.c \
src/elpa_index.h \
src/fortran_constants.h \
src/general/map_global_to_local.F90 \
......
#include "config-f90.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// most of the file is not compiled if not using MPI
#ifdef WITH_MPI
#include <mpi.h>
//#include <elpa/elpa.h>
//#include <elpa/elpa_generated.h>
//#include <elpa/elpa_constants.h>
//#include <elpa/elpa_generated_legacy.h>
//#include <elpa/elpa_generic.h>
//#include <elpa/elpa_legacy.h>
//
void pdlacpy_(char*, int*, int*, double*, int*, int*, int*, double*, int*, int*, int*);
void dlacpy_(char*, int*, int*, double*, int*, double*, int*);
void dgemm_(char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*);
void pdtran_(int*, int*, double*, double*, int*, int*, int*, double*, double*, int*, int*, int*);
//void pdelset_(double*, int*, int*, int*, double*);
//void pdsymm_(char*, char*, int*, int*, double*, double*, int*, int*, int*, double*, int*, int*, int*, double*, double*, int*, int*, int*);
//void pdpotrf_(char*, int*, double*, int*, int*, int*, int*);
//void pdsyngst_(int*, char*, int*, double*, int*, int*, int*, double*, int*, int*, int*, double*, double*, int*, int*);
//void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*);
int numroc_(int*, int*, int*, int*, int*);
//void set_up_blacsgrid_f1(int, int*, int*, int*, int*, int*, int*, int*);
//void pdtrtrs_(char*, char*, char*, int*, int*, double*, int*, int*, int*, double*, int*, int*, int*, int*);
//void pdsyevr_(char*, char*, char*, int*, double*, int*, int*, int*, int*, int*, int*, int*, int*, int*, double*, double*, int*, int*, int*, double*, int*, int*, int*, int*);
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
#include "cannon_forw_template.c"
#undef DOUBLE_PRECISION
#undef REALCASE
//***********************************************************************************************************
/*
!f> interface
!f> subroutine cannons_reduction(A, U, local_rows, local_cols, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="cannons_reduction_c_d")
!f> use, intrinsic :: iso_c_binding
!f> real(c_double) :: A(local_rows, local_cols), U(local_rows, local_cols), Res(local_rows, local_cols)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rows, local_cols
!f> integer(kind=c_int),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
*/
void cannons_reduction_c_d(double* A, double* U, int local_rows, int local_cols, int* a_desc,
double *Res, int ToStore, int row_comm, int col_comm);
#else
void cannons_reduction_c_d(double* A, double* U, int local_rows, int local_cols, int* a_desc,
double *Res, int ToStore, int row_comm, int col_comm)
{
}
#endif
#include "config-f90.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// most of the file is not compiled if not using MPI
#ifdef WITH_MPI
#include <mpi.h>
//#include <elpa/elpa.h>
//#include <elpa/elpa_generated.h>
//#include <elpa/elpa_constants.h>
//#include <elpa/elpa_generated_legacy.h>
//#include <elpa/elpa_generic.h>
//#include <elpa/elpa_legacy.h>
//
void pdlacpy_(char*, int*, int*, double*, int*, int*, int*, double*, int*, int*, int*);
void dlacpy_(char*, int*, int*, double*, int*, double*, int*);
void dgemm_(char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*);
void pdtran_(int*, int*, double*, double*, int*, int*, int*, double*, double*, int*, int*, int*);
//void pdelset_(double*, int*, int*, int*, double*);
//void pdsymm_(char*, char*, int*, int*, double*, double*, int*, int*, int*, double*, int*, int*, int*, double*, double*, int*, int*, int*);
//void pdpotrf_(char*, int*, double*, int*, int*, int*, int*);
//void pdsyngst_(int*, char*, int*, double*, int*, int*, int*, double*, int*, int*, int*, double*, double*, int*, int*);
//void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*);
int numroc_(int*, int*, int*, int*, int*);
//void set_up_blacsgrid_f1(int, int*, int*, int*, int*, int*, int*, int*);
//void pdtrtrs_(char*, char*, char*, int*, int*, double*, int*, int*, int*, double*, int*, int*, int*, int*);
//void pdsyevr_(char*, char*, char*, int*, double*, int*, int*, int*, int*, int*, int*, int*, int*, int*, double*, double*, int*, int*, int*, double*, int*, int*, int*, int*);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////// My function for reduction //////////////////////////////////////////////////////////
// it seems, that we need those two levels of indirection to correctly expand macros
#define cannons_reduction_impl_expand2(SUFFIX) cannons_reduction_##SUFFIX
#define cannons_reduction_impl_expand1(SUFFIX) cannons_reduction_impl_expand2(SUFFIX)
#define cannons_reduction_impl cannons_reduction_impl_expand1(ELPA_IMPL_SUFFIX)
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define cannons_reduction_c_impl_expand2(SUFFIX) cannons_reduction_c_##SUFFIX
#define cannons_reduction_c_impl_expand1(SUFFIX) cannons_reduction_c_impl_expand2(SUFFIX)
#define cannons_reduction_c_impl cannons_reduction_c_impl_expand1(ELPA_IMPL_SUFFIX)
void d_cannons_reduction(double* A, double* U, int np_rows, int np_cols, int my_prow, int my_pcol, int* a_desc,
double *Res, int ToStore, MPI_Comm row_comm, MPI_Comm col_comm)
void cannons_reduction_impl(double* A, double* U, int np_rows, int np_cols, int my_prow, int my_pcol,
int* a_desc, double *Res, int ToStore, MPI_Comm row_comm, MPI_Comm col_comm)
{
// Input matrices:
// - A: full matrix
......@@ -900,29 +874,14 @@ void d_cannons_reduction(double* A, double* U, int np_rows, int np_cols, int my_
free(U_stored);
free(SizesU);
}
#endif
//***********************************************************************************************************
/*
!f> interface
!f> subroutine cannons_reduction(A, U, local_rows, local_cols, a_desc, Res, toStore, row_comm, col_comm) &
!f> bind(C, name="d_cannons_reduction_c")
!f> use, intrinsic :: iso_c_binding
!f> real(c_double) :: A(local_rows, local_cols), U(local_rows, local_cols), Res(local_rows, local_cols)
!f> !type(c_ptr), value :: A, U, Res
!f> integer(kind=c_int) :: a_desc(9)
!f> integer(kind=c_int),value :: local_rows, local_cols
!f> integer(kind=c_int),value :: row_comm, col_comm, ToStore
!f> end subroutine
!f> end interface
*/
void d_cannons_reduction_c(double* A, double* U, int local_rows, int local_cols, int* a_desc,
double *Res, int ToStore, int row_comm, int col_comm)
void cannons_reduction_c_impl(double* A, double* U, int local_rows, int local_cols,
int* a_desc, double *Res, int ToStore, int row_comm, int col_comm)
{
#ifdef WITH_MPI
MPI_Comm c_row_comm = MPI_Comm_f2c(row_comm);
MPI_Comm c_col_comm = MPI_Comm_f2c(col_comm);
int my_prow, my_pcol, np_rows, np_cols;
MPI_Comm_rank(c_row_comm, &my_prow);
MPI_Comm_size(c_row_comm, &np_rows);
......@@ -934,7 +893,7 @@ void d_cannons_reduction_c(double* A, double* U, int local_rows, int local_cols,
// What we usually call row_comm in elpa, is thus passed to col_comm parameter of the function and vice versa
// (order is swapped in the following call)
// It is a bit unfortunate, maybe it should be changed in the Cannon algorithm to comply with ELPA standard notation?
d_cannons_reduction(A, U, np_rows, np_cols, my_prow, my_pcol, a_desc, Res, ToStore, c_col_comm, c_row_comm);
cannons_reduction_impl(A, U, np_rows, np_cols, my_prow, my_pcol, a_desc, Res, ToStore, c_col_comm, c_row_comm);
#else
printf("Internal error: Cannons algorithm should not be called without MPI, stopping...\n");
exit(1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment