Commit 94d1babe authored by Andreas Marek's avatar Andreas Marek

C-interfaces for autotuning routines and test program

parent 0891ae1d
noinst_PROGRAMS += \
real_2stage_c_version@SUFFIX@ \
autotune_c_version@SUFFIX@ \
legacy_real_1stage@SUFFIX@ \
legacy_complex_1stage@SUFFIX@ \
legacy_real_2stage@SUFFIX@ \
......@@ -76,7 +77,10 @@ endif
real_2stage_c_version@SUFFIX@_SOURCES = test/C/real_2stage_c_version.c
real_2stage_c_version@SUFFIX@_LDADD = $(test_program_ldadd) $(FCLIBS)
real_2stage_c_version@SUFFIX@_FCFLAGS = $(test_program_fcflags)
EXTRA_real_2stage_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.F90
autotune_c_version@SUFFIX@_SOURCES = test/C/autotune_c_version.c
autotune_c_version@SUFFIX@_LDADD = $(test_program_ldadd) $(FCLIBS)
autotune_c_version@SUFFIX@_FCFLAGS = $(test_program_fcflags)
legacy_real_1stage_c_version@SUFFIX@_SOURCES = test/C/elpa1/legacy_interface/legacy_real_1stage_c_version.c
legacy_real_1stage_c_version@SUFFIX@_LDADD = $(test_program_ldadd) $(FCLIBS)
......
......@@ -223,7 +223,7 @@ module elpa
!> \brief function to deallocate an ELPA autotune instance
!> Parameters
!> \details
!> \param obj class(elpa_autotune_t), pointer : pointer to then autotune object to be destroyed and deallocated
!> \param obj class(elpa_autotune_t), pointer : pointer to the autotune object to be destroyed and deallocated
subroutine elpa_autotune_deallocate(obj)
class(elpa_autotune_t), pointer :: obj
call obj%destroy()
......
......@@ -179,7 +179,7 @@ module elpa_api
end type elpa_t
!> \brief Abstract definition of the elpa_autotunet type
!> \brief Abstract definition of the elpa_autotune type
type, abstract :: elpa_autotune_t
private
contains
......
......@@ -129,7 +129,7 @@ module elpa_impl
procedure, public :: associate_int => elpa_associate_int !< public method to set some pointers
procedure, public :: autotune_setup => elpa_autotune_impl_setup
procedure, public :: autotune_setup => elpa_autotune_setup
procedure, public :: autotune_step => elpa_autotune_step
procedure, public :: autotune_set_best => elpa_autotune_set_best
......@@ -208,12 +208,13 @@ module elpa_impl
!c> * \param elpa_autotune_impl_t handle of ELPA autotune object to be deallocated
!c> * \result void
!c> */
!c> void elpa_autotune_deallocate(elpa_t handle);
subroutine elpa_autotune_impl_deallocate_c(handle) bind(C, name="elpa_autotune_deallocate")
type(c_ptr), value :: handle
type(elpa_impl_t), pointer :: self
!c> void elpa_autotune_deallocate(elpa_autotune_t handle);
subroutine elpa_autotune_impl_deallocate_c( autotune_handle) bind(C, name="elpa_autotune_deallocate")
type(c_ptr), value :: autotune_handle
call c_f_pointer(handle, self)
type(elpa_autotune_impl_t), pointer :: self
call c_f_pointer(autotune_handle, self)
call self%destroy()
deallocate(self)
end subroutine
......@@ -2602,40 +2603,13 @@ module elpa_impl
end subroutine
!!> \brief function to setup the ELPA autotuning and create the autotune object
!!> Parameters
!!> \param self class(elpa_impl_t) the allocated ELPA object
!!> \param level integer: the "thoroughness" of the planed autotuning
!!> \param domain integer: the domain (real/complex) which should be tuned
!!> \result tune_state class(elpa_autotune_t): the created autotuning object
!function elpa_autotune_setup(self, level, domain) result(tune_state)
! class(elpa_impl_t), intent(inout), target :: self
! integer, intent(in) :: level, domain
! type(elpa_autotune_impl_t), pointer :: ts_impl
! class(elpa_autotune_t), pointer :: tune_state
! allocate(ts_impl)
! ts_impl%parent => self
! ts_impl%level = level
! ts_impl%domain = domain
! ts_impl%i = -1
! ts_impl%min_loc = -1
! ts_impl%N = elpa_index_autotune_cardinality_c(self%index, level, domain)
! tune_state => ts_impl
! call self%autotune_timer%enable()
!end function
!> \brief function to setup the ELPA autotuning and create the autotune object
!> Parameters
!> \param self class(elpa_impl_t) the allocated ELPA object
!> \param level integer: the "thoroughness" of the planed autotuning
!> \param domain integer: the domain (real/complex) which should be tuned
!> \result tune_state class(elpa_autotune_t): the created autotuning object
function elpa_autotune_impl_setup(self, level, domain) result(tune_state)
function elpa_autotune_setup(self, level, domain) result(tune_state)
class(elpa_impl_t), intent(inout), target :: self
integer, intent(in) :: level, domain
type(elpa_autotune_impl_t), pointer :: ts_impl
......@@ -2657,27 +2631,40 @@ module elpa_impl
!!c> /*! \brief C interface for the implementation of the elpa_autotune_setup method
!!c> *
!!c> * \param elpa_t handle: of the ELPA object which should be tuned
!!c> * \param int level: "thoroughness" of autotuning
!!c> * \param int domain: real/complex autotuning
!!c> * \result elpa_autotune_t handle: on the autotune object
!!c> */
!!c> elpa_autotune_t elpa_autotune_setup(elpa_t handle, int level, int domain);
!function elpa_autotune_impl_setup_c(handle ,level, domain) result(ptr) bind(C, name="elpa_autotune_setup")
! type(c_ptr), intent(in), value :: handle
! type(elpa_impl_t), pointer :: self
! type(elpa_autotune_impl_t), pointer :: obj
! integer(kind=c_int), intent(in) :: level
! integer(kind=c_int), intent(in) :: domain
! type(c_ptr) :: ptr
!c> /*! \brief C interface for the implementation of the elpa_autotune_setup method
!c> *
!c> * \param elpa_t handle: of the ELPA object which should be tuned
!c> * \param int level: "thoroughness" of autotuning
!c> * \param int domain: real/complex autotuning
!c> * \result elpa_autotune_t handle: on the autotune object
!c> */
!c> elpa_autotune_t elpa_autotune_setup(elpa_t handle, int level, int domain);
function elpa_autotune_setup_c(handle ,level, domain) result(ptr) bind(C, name="elpa_autotune_setup")
type(c_ptr), intent(in), value :: handle
type(elpa_impl_t), pointer :: self
class(elpa_autotune_t), pointer :: tune_state
type(elpa_autotune_impl_t), pointer :: obj
integer(kind=c_int), intent(in), value :: level
integer(kind=c_int), intent(in), value :: domain
type(c_ptr) :: ptr
print *,"Calling c_f_pointer handle"
call c_f_pointer(handle, self)
! call c_f_pointer(handle, self)
! obj => self%autotune_setup(level, domain)
! ptr = c_loc(obj)
print *,"Calling setup"
print *,level,domain
tune_state => self%autotune_setup(level, domain)
print *,"After setup"
select type(tune_state)
class is (elpa_autotune_impl_t)
obj => tune_state
class default
print *, "This should not happen"
end select
ptr = c_loc(obj)
!end function
end function
!> \brief function to do an autotunig step
......@@ -2705,7 +2692,7 @@ module elpa_impl
if (ts_impl%i >= 0) then
time_spent = self%autotune_timer%get("accumulator")
print *, time_spent
!print *, time_spent
if (ts_impl%min_loc == -1 .or. (time_spent < ts_impl%min_val)) then
ts_impl%min_val = time_spent
ts_impl%min_loc = ts_impl%i
......@@ -2724,6 +2711,36 @@ module elpa_impl
end function
!c> /*! \brief C interface for the implementation of the elpa_autotune_step method
!c> *
!c> * \param elpa_t handle: of the ELPA object which should be tuned
!c> * \param elpa_autotune_t autotune_handle: the autotuning object
!c> * \result int unfinished: describes whether autotuning finished (0) or not (1)
!c> */
!c> int elpa_autotune_step(elpa_t handle, elpa_autotune_t autotune_handle);
function elpa_autotune_step_c(handle, autotune_handle) result(unfinished) bind(C, name="elpa_autotune_step")
type(c_ptr), intent(in), value :: handle
type(c_ptr), intent(in), value :: autotune_handle
type(elpa_impl_t), pointer :: self
type(elpa_autotune_impl_t), pointer :: tune_state
logical :: unfinished_f
integer(kind=c_int) :: unfinished
call c_f_pointer(handle, self)
call c_f_pointer(autotune_handle, tune_state)
unfinished_f = self%autotune_step(tune_state)
if (unfinished_f) then
unfinished = 1
else
unfinished = 0
endif
end function
!> \brief function to set the up-to-know best options of the autotuning
!> Parameters
!> \param self class(elpa_impl_t) the allocated ELPA object
......@@ -2747,4 +2764,28 @@ module elpa_impl
endif
end subroutine
!c> /*! \brief C interface for the implementation of the elpa_autotune_set_best method
!c> *
!c> * \param elpa_t handle: of the ELPA object which should be tuned
!c> * \param elpa_autotune_t autotune_handle: the autotuning object
!c> * \result none
!c> */
!c> void elpa_autotune_set_best(elpa_t handle, elpa_autotune_t autotune_handle);
subroutine elpa_autotune_set_best_c(handle, autotune_handle) bind(C, name="elpa_autotune_set_best")
type(c_ptr), intent(in), value :: handle
type(c_ptr), intent(in), value :: autotune_handle
type(elpa_impl_t), pointer :: self
type(elpa_autotune_impl_t), pointer :: tune_state
call c_f_pointer(handle, self)
call c_f_pointer(autotune_handle, tune_state)
call self%autotune_set_best(tune_state)
end subroutine
end module
/* This file is part of ELPA. */
/* */
/* The ELPA library was originally created by the ELPA consortium, */
/* consisting of the following organizations: */
/* */
/* - Max Planck Computing and Data Facility (MPCDF), formerly known as */
/* Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), */
/* - Bergische Universität Wuppertal, Lehrstuhl für angewandte */
/* Informatik, */
/* - Technische Universität München, Lehrstuhl für Informatik mit */
/* Schwerpunkt Wissenschaftliches Rechnen , */
/* - Fritz-Haber-Institut, Berlin, Abt. Theorie, */
/* - Max-Plack-Institut für Mathematik in den Naturwissenschaften, */
/* Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, */
/* and */
/* - IBM Deutschland GmbH */
/* */
/* */
/* More information can be found here: */
/* http://elpa.mpcdf.mpg.de/ */
/* */
/* ELPA is free software: you can redistribute it and/or modify */
/* it under the terms of the version 3 of the license of the */
/* GNU Lesser General Public License as published by the Free */
/* Software Foundation. */
/* */
/* ELPA is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
/* GNU Lesser General Public License for more details. */
/* */
/* You should have received a copy of the GNU Lesser General Public License */
/* along with ELPA. If not, see <http://www.gnu.org/licenses/> */
/* */
/* ELPA reflects a substantial effort on the part of the original */
/* ELPA consortium, and we ask you to respect the spirit of the */
/* license that we chose: i.e., please contribute any changes you */
/* may have back to the original ELPA library distribution, and keep */
/* any derivatives of ELPA under the same license that we chose for */
/* the original distribution, the GNU Lesser General Public License. */
/* */
/* */
#include "config-f90.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef WITH_MPI
#include <mpi.h>
#endif
#include <math.h>
#include <elpa/elpa.h>
#include <assert.h>
#include <string.h>
#include <test/shared/generated.h>
#define DOUBLE_PRECISION_REAL 1
#define assert_elpa_ok(x) assert(x == ELPA_OK)
int main(int argc, char** argv) {
int myid;
int nprocs;
#ifndef WITH_MPI
int MPI_COMM_WORLD;
#endif
int na, nev, nblk;
int status;
int np_cols, np_rows, np_colsStart;
int my_blacs_ctxt, my_prow, my_pcol;
int mpierr;
int my_mpi_comm_world;
int mpi_comm_rows, mpi_comm_cols;
int info, *sc_desc;
int na_rows, na_cols;
double startVal;
#ifdef DOUBLE_PRECISION_REAL
double *a, *z, *as, *ev;
#else
float *a, *z, *as, *ev;
#endif
int success;
elpa_t handle;
elpa_autotune_t autotune_handle;
int value, error, unfinished, i;
#ifdef WITH_MPI
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
#else
nprocs = 1;
myid=0;
MPI_COMM_WORLD=1;
#endif
na = 100;
nev = 50;
nblk = 16;
if (myid == 0) {
printf("This is the c version of an ELPA test-programm\n");
printf("\n");
printf("It will call the 2stage ELPA real solver for an\n");
printf("matrix of size %d. It will compute %d eigenvalues\n",na,nev);
printf("and uses a blocksize of %d\n",nblk);
printf("\n");
printf("This is an example program with much less functionality\n");
printf("as it's Fortran counterpart. It's only purpose is to show how \n");
printf("to evoke ELPA2 from a c programm\n");
printf("\n");
#ifdef DOUBLE_PRECISION_REAL
printf(" Double precision version of ELPA2 is used. \n");
#else
printf(" Single precision version of ELPA2 is used. \n");
#endif
}
status = 0;
startVal = sqrt((double) nprocs);
np_colsStart = (int) round(startVal);
for (np_cols=np_colsStart;np_cols>1;np_cols--){
if (nprocs %np_cols ==0){
break;
}
}
np_rows = nprocs/np_cols;
if (myid == 0) {
printf("\n");
printf("Number of processor rows %d, cols %d, total %d \n",np_rows,np_cols,nprocs);
}
/* set up blacs */
/* convert communicators before */
#ifdef WITH_MPI
my_mpi_comm_world = MPI_Comm_c2f(MPI_COMM_WORLD);
#else
my_mpi_comm_world = 1;
#endif
set_up_blacsgrid_f(my_mpi_comm_world, np_rows, np_cols, 'C', &my_blacs_ctxt, &my_prow, &my_pcol);
if (myid == 0) {
printf("\n");
printf("Past BLACS_Gridinfo...\n");
printf("\n");
}
sc_desc = malloc(9*sizeof(int));
set_up_blacs_descriptor_f(na, nblk, my_prow, my_pcol, np_rows, np_cols, &na_rows, &na_cols, sc_desc, my_blacs_ctxt, &info);
if (myid == 0) {
printf("\n");
printf("Past scalapack descriptor setup...\n");
printf("\n");
}
/* allocate the matrices needed for elpa */
if (myid == 0) {
printf("\n");
printf("Allocating matrices with na_rows=%d and na_cols=%d\n",na_rows, na_cols);
printf("\n");
}
#ifdef DOUBLE_PRECISION_REAL
a = malloc(na_rows*na_cols*sizeof(double));
z = malloc(na_rows*na_cols*sizeof(double));
as = malloc(na_rows*na_cols*sizeof(double));
ev = malloc(na*sizeof(double));
#else
a = malloc(na_rows*na_cols*sizeof(float));
z = malloc(na_rows*na_cols*sizeof(float));
as = malloc(na_rows*na_cols*sizeof(float));
ev = malloc(na*sizeof(float));
#endif
#ifdef DOUBLE_PRECISION_REAL
prepare_matrix_random_real_double_f(na, myid, na_rows, na_cols, sc_desc, a, z, as);
#else
prepare_matrix_random_real_single_f(na, myid, na_rows, na_cols, sc_desc, a, z, as);
#endif
if (elpa_init(CURRENT_API_VERSION) != ELPA_OK) {
fprintf(stderr, "Error: ELPA API version not supported");
exit(1);
}
handle = elpa_allocate(&error);
assert_elpa_ok(error);
/* Set parameters */
elpa_set(handle, "na", na, &error);
assert_elpa_ok(error);
elpa_set(handle, "nev", nev, &error);
assert_elpa_ok(error);
elpa_set(handle, "local_nrows", na_rows, &error);
assert_elpa_ok(error);
elpa_set(handle, "local_ncols", na_cols, &error);
assert_elpa_ok(error);
elpa_set(handle, "nblk", nblk, &error);
assert_elpa_ok(error);
#ifdef WITH_MPI
elpa_set(handle, "mpi_comm_parent", MPI_Comm_c2f(MPI_COMM_WORLD), &error);
assert_elpa_ok(error);
elpa_set(handle, "process_row", my_prow, &error);
assert_elpa_ok(error);
elpa_set(handle, "process_col", my_pcol, &error);
assert_elpa_ok(error);
#endif
/* Setup */
assert_elpa_ok(elpa_setup(handle));
/* Set tunables */
elpa_set(handle, "gpu", 0, &error);
assert_elpa_ok(error);
#ifdef WITH_MPI
mpierr = MPI_Barrier(MPI_COMM_WORLD);
#endif
autotune_handle = elpa_autotune_setup(handle, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL);
/* mimic 10 scf steps */
for (i=0; i < 20; i++) {
unfinished = elpa_autotune_step(handle, autotune_handle);
if (unfinished == 0) {
if (myid == 0) {
printf("ELPA autotuning finished in the %d th scf step \n",i);
}
break;
}
/* Solve EV problem */
elpa_eigenvectors(handle, a, ev, z, &error);
assert_elpa_ok(error);
/* check the results */
#ifdef DOUBLE_PRECISION_REAL
status = check_correctness_evp_numeric_residuals_real_double_f(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid);
memcpy(a, as, na_rows*na_cols*sizeof(double));
#else
status = check_correctness_evp_numeric_residuals_real_single_f(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid);
memcpy(a, as, na_rows*na_cols*sizeof(float));
#endif
if (status !=0){
printf("The computed EVs are not correct !\n");
break;
}
printf("hier %d \n",myid);
}
if (unfinished == 1) {
if (myid == 0) {
printf("ELPA autotuning did not finished during %d scf cycles\n",i);
}
}
elpa_autotune_set_best(handle, autotune_handle);
elpa_autotune_deallocate(autotune_handle);
elpa_deallocate(handle);
elpa_uninit();
if (myid == 0) {
printf("\n");
printf("2stage ELPA real solver complete\n");
printf("\n");
}
if (status ==0){
if (myid ==0) {
printf("All ok!\n");
}
}
free(sc_desc);
free(a);
free(z);
free(as);
free(ev);
#ifdef WITH_MPI
MPI_Finalize();
#endif
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment