Commit ebc9ed9b authored by Andreas Marek's avatar Andreas Marek
Browse files

New keywords for GPU in ELPA index

- Rename keyword "gpu" -> "nvidia-gpu"
- Add keyword "amd-gpu"
parent 22b86859
...@@ -139,7 +139,7 @@ libelpa@SUFFIX@_private_la_SOURCES += \ ...@@ -139,7 +139,7 @@ libelpa@SUFFIX@_private_la_SOURCES += \
endif endif
if WITH_NVIDIA_GPU_VERSION if WITH_NVIDIA_GPU_VERSION
libelpa@SUFFIX@_private_la_SOURCES += src/GPU/CUDA/elpa_index_gpu.cu src/GPU/CUDA/cudaFunctions.cu src/GPU/CUDA/cuUtils.cu src/elpa2/GPU/CUDA/ev_tridi_band_gpu_real.cu src/elpa2/GPU/CUDA/ev_tridi_band_gpu_complex.cu libelpa@SUFFIX@_private_la_SOURCES += src/GPU/CUDA/elpa_index_nvidia_gpu.cu src/GPU/CUDA/cudaFunctions.cu src/GPU/CUDA/cuUtils.cu src/elpa2/GPU/CUDA/ev_tridi_band_gpu_real.cu src/elpa2/GPU/CUDA/ev_tridi_band_gpu_complex.cu
endif endif
if WITH_AMD_GPU_VERSION if WITH_AMD_GPU_VERSION
......
extern "C" { extern "C" {
int gpu_count() { int nvidia_gpu_count() {
int count; int count;
cudaError_t cuerr = cudaGetDeviceCount(&count); cudaError_t cuerr = cudaGetDeviceCount(&count);
if (cuerr != cudaSuccess) { if (cuerr != cudaSuccess) {
......
...@@ -294,7 +294,7 @@ function elpa_solve_evp_& ...@@ -294,7 +294,7 @@ function elpa_solve_evp_&
obj%eigenvalues_only = .true. obj%eigenvalues_only = .true.
endif endif
call obj%get("gpu",gpu,error) call obj%get("nvidia-gpu",gpu,error)
if (error .ne. ELPA_OK) then if (error .ne. ELPA_OK) then
print *,"Problem getting option for gpu. Aborting..." print *,"Problem getting option for gpu. Aborting..."
stop stop
......
...@@ -109,7 +109,7 @@ ...@@ -109,7 +109,7 @@
success = .true. success = .true.
! GPU settings ! GPU settings
call obj%get("gpu", gpu,error) call obj%get("nvidia-gpu", gpu,error)
if (error .ne. ELPA_OK) then if (error .ne. ELPA_OK) then
print *,"Problem getting option for gpu. Aborting..." print *,"Problem getting option for gpu. Aborting..."
stop stop
......
...@@ -365,7 +365,7 @@ ...@@ -365,7 +365,7 @@
wantDebug = debug == 1 wantDebug = debug == 1
! GPU settings ! GPU settings
call obj%get("gpu", gpu,error) call obj%get("nvidia-gpu", gpu,error)
if (error .ne. ELPA_OK) then if (error .ne. ELPA_OK) then
print *,"Problem getting option gpu settings. Aborting..." print *,"Problem getting option gpu settings. Aborting..."
stop stop
......
...@@ -113,7 +113,10 @@ static int min_tile_size_enumerate(elpa_index_t index, int i); ...@@ -113,7 +113,10 @@ static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value); static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);
#ifdef WITH_NVIDIA_GPU_VERSION #ifdef WITH_NVIDIA_GPU_VERSION
int gpu_count(); int nvidia_gpu_count();
#endif
#ifdef WITH_AMD_GPU_VERSION
int amd_gpu_count();
#endif #endif
static int use_gpu_id_cardinality(elpa_index_t index); static int use_gpu_id_cardinality(elpa_index_t index);
...@@ -223,7 +226,9 @@ static const elpa_index_int_entry_t int_entries[] = { ...@@ -223,7 +226,9 @@ static const elpa_index_int_entry_t int_entries[] = {
number_of_matrix_layouts, matrix_layout_enumerate, matrix_layout_is_valid, elpa_matrix_layout_name, PRINT_YES), \ number_of_matrix_layouts, matrix_layout_enumerate, matrix_layout_is_valid, elpa_matrix_layout_name, PRINT_YES), \
INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \ INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES), number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \ INT_ENTRY("nvidia-gpu", "Use Nvidia GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
INT_ENTRY("amd-gpu", "Use AMD GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES), cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
INT_ENTRY("is_skewsymmetric", "Matrix is skewsymmetic", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, INT_ENTRY("is_skewsymmetric", "Matrix is skewsymmetic", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0,
cardinality_bool, enumerate_identity, skewsymmetric_is_valid, NULL, PRINT_YES), cardinality_bool, enumerate_identity, skewsymmetric_is_valid, NULL, PRINT_YES),
...@@ -765,7 +770,7 @@ static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) { ...@@ -765,7 +770,7 @@ static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
if (solver == ELPA_SOLVER_1STAGE) { if (solver == ELPA_SOLVER_1STAGE) {
return new_value == ELPA_2STAGE_REAL_DEFAULT; return new_value == ELPA_2STAGE_REAL_DEFAULT;
} }
int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL); int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL));
switch(new_value) { switch(new_value) {
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE) ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
default: default:
...@@ -804,7 +809,7 @@ static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) { ...@@ -804,7 +809,7 @@ static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
if (solver == ELPA_SOLVER_1STAGE) { if (solver == ELPA_SOLVER_1STAGE) {
return new_value == ELPA_2STAGE_COMPLEX_DEFAULT; return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
} }
int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL); int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL));
switch(new_value) { switch(new_value) {
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE) ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
default: default:
...@@ -1038,7 +1043,7 @@ static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) { ...@@ -1038,7 +1043,7 @@ static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
static int valid_with_gpu(elpa_index_t index, int n, int new_value) { static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL); int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL));
if (gpu_is_active == 1) { if (gpu_is_active == 1) {
return ((new_value == 0 ) || (new_value == 1)); return ((new_value == 0 ) || (new_value == 1));
} }
...@@ -1049,7 +1054,7 @@ static int valid_with_gpu(elpa_index_t index, int n, int new_value) { ...@@ -1049,7 +1054,7 @@ static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) { static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
int solver = elpa_index_get_int_value(index, "solver", NULL); int solver = elpa_index_get_int_value(index, "solver", NULL);
int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL); int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL));
if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) { if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
return ((new_value == 0 ) || (new_value == 1)); return ((new_value == 0 ) || (new_value == 1));
} }
...@@ -1060,7 +1065,7 @@ static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) { ...@@ -1060,7 +1065,7 @@ static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) { static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
int solver = elpa_index_get_int_value(index, "solver", NULL); int solver = elpa_index_get_int_value(index, "solver", NULL);
int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL); int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL));
if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) { if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
return ((new_value == 0 ) || (new_value == 1)); return ((new_value == 0 ) || (new_value == 1));
} }
...@@ -1106,12 +1111,14 @@ static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) { ...@@ -1106,12 +1111,14 @@ static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
static int use_gpu_id_cardinality(elpa_index_t index) { static int use_gpu_id_cardinality(elpa_index_t index) {
#ifdef WITH_NVIDIA_GPU_VERSION #ifdef WITH_NVIDIA_GPU_VERSION
int count; int count;
count = gpu_count(); count = nvidia_gpu_count();
if (count == -1000) { if (count == -1000) {
fprintf(stderr, "Querrying GPUs failed! Set GPU count = 0\n"); fprintf(stderr, "Querrying GPUs failed! Set GPU count = 0\n");
return 0; return 0;
} }
return count; return count;
#elif WITH_AMD_GPU_VERION
return 0;
#else #else
return 0; return 0;
#endif #endif
...@@ -1125,13 +1132,15 @@ static int use_gpu_id_enumerate(elpa_index_t index, int i) { ...@@ -1125,13 +1132,15 @@ static int use_gpu_id_enumerate(elpa_index_t index, int i) {
static int use_gpu_id_is_valid(elpa_index_t index, int n, int new_value) { static int use_gpu_id_is_valid(elpa_index_t index, int n, int new_value) {
#ifdef WITH_NVIDIA_GPU_VERSION #ifdef WITH_NVIDIA_GPU_VERSION
int count; int count;
count = gpu_count(); count = nvidia_gpu_count();
if (count == -1000) { if (count == -1000) {
fprintf(stderr, "Querrying GPUs failed! Return with error\n"); fprintf(stderr, "Querrying GPUs failed! Return with error\n");
return 0 == 1 ; return 0 == 1 ;
} else { } else {
return (0 <= new_value) && (new_value <= count); return (0 <= new_value) && (new_value <= count);
} }
#elif WITH_AMD_GPU_VERION
return 0 == 1;
#else #else
return 0 == 0; return 0 == 0;
#endif #endif
......
...@@ -273,7 +273,7 @@ int main(int argc, char** argv) { ...@@ -273,7 +273,7 @@ int main(int argc, char** argv) {
#endif #endif
assert_elpa_ok(error_elpa); assert_elpa_ok(error_elpa);
elpa_set(handle, "gpu", TEST_GPU, &error_elpa); elpa_set(handle, "nvidia-gpu", TEST_GPU, &error_elpa);
assert_elpa_ok(error_elpa); assert_elpa_ok(error_elpa);
#if defined(TEST_SOLVE_2STAGE) && defined(TEST_KERNEL) #if defined(TEST_SOLVE_2STAGE) && defined(TEST_KERNEL)
......
...@@ -226,7 +226,7 @@ int main(int argc, char** argv) { ...@@ -226,7 +226,7 @@ int main(int argc, char** argv) {
/* Setup */ /* Setup */
assert_elpa_ok(elpa_setup(handle)); assert_elpa_ok(elpa_setup(handle));
elpa_set(handle, "gpu", 0, &error_elpa); elpa_set(handle, "nvidia-gpu", 0, &error_elpa);
assert_elpa_ok(error_elpa); assert_elpa_ok(error_elpa);
autotune_handle = elpa_autotune_setup(handle, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, &error_elpa); autotune_handle = elpa_autotune_setup(handle, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, &error_elpa);
......
...@@ -223,7 +223,7 @@ int main(int argc, char** argv) { ...@@ -223,7 +223,7 @@ int main(int argc, char** argv) {
/* Setup */ /* Setup */
assert_elpa_ok(elpa_setup(elpa_handle_1)); assert_elpa_ok(elpa_setup(elpa_handle_1));
elpa_set(elpa_handle_1, "gpu", 0, &error_elpa); elpa_set(elpa_handle_1, "nvidia-gpu", 0, &error_elpa);
assert_elpa_ok(error_elpa); assert_elpa_ok(error_elpa);
elpa_set(elpa_handle_1, "timings", 1, &error_elpa); elpa_set(elpa_handle_1, "timings", 1, &error_elpa);
...@@ -253,7 +253,7 @@ int main(int argc, char** argv) { ...@@ -253,7 +253,7 @@ int main(int argc, char** argv) {
elpa_load_settings(elpa_handle_2, "initial_parameters.txt", &error_elpa); elpa_load_settings(elpa_handle_2, "initial_parameters.txt", &error_elpa);
elpa_get(elpa_handle_2, "gpu", &gpu, &error_elpa); elpa_get(elpa_handle_2, "nvidia-gpu", &gpu, &error_elpa);
assert_elpa_ok(error_elpa); assert_elpa_ok(error_elpa);
elpa_get(elpa_handle_2, "timings", &timings, &error_elpa); elpa_get(elpa_handle_2, "timings", &timings, &error_elpa);
......
...@@ -640,7 +640,7 @@ program test ...@@ -640,7 +640,7 @@ program test
#endif #endif
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
call e%set("gpu", TEST_GPU, error_elpa) call e%set("nvidia-gpu", TEST_GPU, error_elpa)
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
#ifdef TEST_GPU_SET_ID #ifdef TEST_GPU_SET_ID
......
...@@ -228,7 +228,7 @@ program test ...@@ -228,7 +228,7 @@ program test
call e%set("debug",1, error_elpa) call e%set("debug",1, error_elpa)
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
call e%set("gpu", 0, error_elpa) call e%set("nvidia-gpu", 0, error_elpa)
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
!call e%set("max_stored_rows", 15, error_elpa) !call e%set("max_stored_rows", 15, error_elpa)
......
...@@ -209,7 +209,7 @@ program test ...@@ -209,7 +209,7 @@ program test
call e1%set("debug",1, error_elpa) call e1%set("debug",1, error_elpa)
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
call e1%set("gpu", 0, error_elpa) call e1%set("nvidia-gpu", 0, error_elpa)
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
!call e1%set("max_stored_rows", 15, error_elpa) !call e1%set("max_stored_rows", 15, error_elpa)
...@@ -238,7 +238,7 @@ program test ...@@ -238,7 +238,7 @@ program test
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
call e2%get("debug", int(debug,kind=c_int), error_elpa) call e2%get("debug", int(debug,kind=c_int), error_elpa)
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
call e2%get("gpu", int(gpu,kind=c_int), error_elpa) call e2%get("nvidia-gpu", int(gpu,kind=c_int), error_elpa)
assert_elpa_ok(error_elpa) assert_elpa_ok(error_elpa)
if ((timings .ne. 1) .or. (debug .ne. 1) .or. (gpu .ne. 0)) then if ((timings .ne. 1) .or. (debug .ne. 1) .or. (gpu .ne. 0)) then
......
...@@ -233,7 +233,7 @@ program test ...@@ -233,7 +233,7 @@ program test
call e_complex%set("timings",1, error_elpa) call e_complex%set("timings",1, error_elpa)
call e_complex%set("debug",1,error_elpa) call e_complex%set("debug",1,error_elpa)
call e_complex%set("gpu", 0,error_elpa) call e_complex%set("nvidia-gpu", 0,error_elpa)
call e_complex%set("omp_threads", 8, error_elpa) call e_complex%set("omp_threads", 8, error_elpa)
assert_elpa_ok(e_complex%setup()) assert_elpa_ok(e_complex%setup())
...@@ -271,7 +271,7 @@ program test ...@@ -271,7 +271,7 @@ program test
call e_skewsymmetric%set("timings",1, error_elpa) call e_skewsymmetric%set("timings",1, error_elpa)
call e_skewsymmetric%set("debug",1,error_elpa) call e_skewsymmetric%set("debug",1,error_elpa)
call e_skewsymmetric%set("gpu", 0,error_elpa) call e_skewsymmetric%set("nvidia-gpu", 0,error_elpa)
call e_skewsymmetric%set("omp_threads",8, error_elpa) call e_skewsymmetric%set("omp_threads",8, error_elpa)
assert_elpa_ok(e_skewsymmetric%setup()) assert_elpa_ok(e_skewsymmetric%setup())
......
...@@ -264,7 +264,7 @@ program test ...@@ -264,7 +264,7 @@ program test
call e%set("timings",1, error_elpa) call e%set("timings",1, error_elpa)
call e%set("debug",1, error_elpa) call e%set("debug",1, error_elpa)
call e%set("gpu", 0, error_elpa) call e%set("nvidia-gpu", 0, error_elpa)
!call e%set("max_stored_rows", 15, error_elpa) !call e%set("max_stored_rows", 15, error_elpa)
assert_elpa_ok(e%setup()) assert_elpa_ok(e%setup())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment