Commit 460b754c authored by Lorenz Huedepohl's avatar Lorenz Huedepohl

Work-In-Progress commit for new ELPA interface

parent dbe258d9
......@@ -20,6 +20,7 @@ libelpa@SUFFIX@_public_la_SOURCES = \
src/elpa1/legacy_interface/elpa1_auxiliary_legacy.F90 \
src/elpa1/elpa1_auxiliary.F90 \
src/elpa_t.F90 \
src/elpa_constants.F90 \
src/general/elpa_utilities.F90
# internal parts
......@@ -53,8 +54,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa2/qr/elpa_pdgeqrf.F90 \
src/elpa1/elpa1.F90 \
src/elpa2/elpa2.F90 \
src/elpa_index.c \
src/elpa_options.c
src/elpa_constants.c \
src/elpa_index.c
EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa1/elpa_reduce_add_vectors.X90 \
......@@ -322,10 +323,9 @@ endif
include generated_headers.am
BUILT_SOURCES = $(generated_headers)
# install public Fortran modules files in the include/ dir
# install public headers and Fortran modules files in the include/ dir
elpa_includedir = $(includedir)/elpa@SUFFIX@-@PACKAGE_VERSION@
nobase_elpa_include_HEADERS = $(wildcard modules/*)
nobase_elpa_include_HEADERS += elpa/elpa.h elpa/elpa_kernel_constants.h elpa/elpa_solver_constants.h elpa/elpa_constants.h elpa/elpa_generated.h
nobase_elpa_include_HEADERS = $(wildcard modules/*) $(wildcard elpa/*)
dist_man_MANS = \
man/solve_evp_real.3 \
......
......@@ -8,14 +8,6 @@ AC_CONFIG_SRCDIR([src/elpa_t.F90])
AM_INIT_AUTOMAKE([foreign -Wall subdir-objects])
# Without this, automake tries to be smart and rebuilt
# the autoconf generated files such as configure, aclocal.m4, etc.,
# in case the timestamps of files such as configure.ac are newer
#
# This only makes trouble for end users with out-of-date autoconf versions
# that cannot produce these files
AM_MAINTAINER_MODE([disable])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_HEADERS([config.h])
AM_SILENT_RULES([yes])
......@@ -75,7 +67,7 @@ if test x"${with_mpi}" = x"yes"; then
fi
# C
AC_LANG([C])
AC_LANG_PUSH([C])
AX_PROG_CC_MPI([test x"$with_mpi" = x"yes"],[found_mpi_c=yes],[found_mpi_c=no])
if test x"$with_mpi" = x"yes"; then
if test x"$found_mpi_c" = x"no"; then
......@@ -96,17 +88,10 @@ AM_PROG_AR
AM_PROG_AS
AC_PROG_CC_C99
AM_PROG_CC_C_O
AC_LANG_POP([C])
# Fortran
dnl check whether single precision is requested
AC_MSG_CHECKING(whether ELPA library should contain also single precision functions)
AC_ARG_ENABLE(single-precision,[AS_HELP_STRING([--enable-single-precision],
[build with single precision])],
want_single_precision="yes", want_single_precision="no")
AC_MSG_RESULT([${want_single_precision}])
AC_LANG([Fortran])
AC_LANG_PUSH([Fortran])
m4_include([m4/ax_prog_fc_mpi.m4])
AX_PROG_FC_MPI([test x"$with_mpi" = x"yes"],[found_mpi_f=yes],[found_mpi_f=no])
if test x"$with_mpi" = x"yes"; then
......@@ -116,6 +101,11 @@ if test x"$with_mpi" = x"yes"; then
fi
AC_FC_SRCEXT([F90])
AC_FC_FREEFORM
AC_FC_MODULE_FLAG
AC_FC_MODULE_OUTPUT_FLAG
AC_FC_LIBRARY_LDFLAGS
if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP
......@@ -133,54 +123,26 @@ if test x"$with_mpi" = x"yes"; then
fi
fi
## C++
#AC_LANG([C++])
#AC_PROG_CXX
#
#if test x"${enable_openmp}" = x"yes"; then
# AX_ELPA_OPENMP
# if test "$ac_cv_prog_cxx_openmp" = unsupported; then
# AC_MSG_ERROR([Could not compile a C++ program with OpenMP, adjust CXXFLAGS])
# fi
# CXXFLAGS="$OPENMP_CXXFLAGS $CXXFLAGS"
#fi
dnl variables needed for the tests
dnl do NOT remove any variables here, until
dnl 1. you know 100% what you are doing
dnl 2. you tested ALL configure functionality afterwards
dnl Otherwise, you most likely break some functionality
dnl as default always define the generic kernels to be build
dnl this is only unset if gpu_support_only is defined, or
dnl other specific real/complex kernels are wanted
install_real_generic=yes
install_real_generic_simple=yes
install_complex_generic=yes
install_complex_generic_simple=yes
#want_avx=yes
#want_avx2=yes
#want_sse=yes
#want_avx512=yes
AC_LANG([C])
dnl build with the possibilty to redirect stdout and stderr
dnl per MPI task in a file
AC_MSG_CHECKING(whether stdout/stderr file redirect should be enabled)
AC_ARG_ENABLE([redirect],
AS_HELP_STRING([--enable-redirect],
[for test programs, allow redirection of stdout/stderr per MPI taks in a file (useful for timing), default no.]),
[AS_HELP_STRING([--enable-redirect],
[for test programs, allow redirection of stdout/stderr per MPI taks in a file (useful for timing), default no.])],
[],
[enable_redirect=no])
AC_MSG_RESULT([${enable_redirect}])
dnl check whether single precision is requested
AC_MSG_CHECKING(whether ELPA library should contain also single precision functions)
AC_ARG_ENABLE(single-precision,
[AS_HELP_STRING([--enable-single-precision],
[build with single precision])],
[want_single_precision="$enableval"],
[want_single_precision="no"])
AC_MSG_RESULT([${want_single_precision}])
dnl redirect
if test x"${enable_redirect}" = x"yes"; then
AC_DEFINE([HAVE_REDIRECT], [1], [Redirect stdout and stderr of test programs per MPI tasks to a file])
......@@ -190,26 +152,26 @@ AM_CONDITIONAL([HAVE_REDIRECT],[test x"$enable_redirect" = x"yes"])
dnl build with ftimings support
AC_MSG_CHECKING(whether ELPA should be build with more detailed timing support)
AC_ARG_ENABLE([timings],
AS_HELP_STRING([--enable-timings],
[more detailed timing, default no.]),
[enable_timings=yes],
[AS_HELP_STRING([--enable-timings],
[more detailed timing, default no.])],
[enable_timings="$enableval"],
[enable_timings=no])
AC_MSG_RESULT([${enable_timings}])
if test x"${enable_timings}" = x"yes"; then
AC_DEFINE([HAVE_DETAILED_TIMINGS], [1], [Enable more timing])
fi
AM_CONDITIONAL([HAVE_DETAILED_TIMINGS],[test x"$enable_timings" = x"yes"])
AM_CONDITIONAL([HAVE_DETAILED_TIMINGS], [test x"$enable_timings" = x"yes"])
dnl PAPI for ftimings
AC_ARG_WITH([papi],
[AS_HELP_STRING([--with-papi],[Use PAPI to also measure flop count in the detailed timing (--enable-timing), disabled by default])],
[AS_HELP_STRING([--with-papi],
[Use PAPI to also measure flop count in the detailed timing (--enable-timing), disabled by default])],
[],
[with_papi="no"])
if test x"${enable_timings}" = x"yes"; then
if test x"$with_papi" = x"yes" ; then
AC_SEARCH_LIBS([PAPI_library_init],[papi],
[papi_found=yes],[papi_found=no])
AC_SEARCH_LIBS([PAPI_library_init], [papi], [papi_found=yes], [papi_found=no])
if test x"$papi_found" = x"no" ; then
AC_MSG_ERROR(["Could not find usable PAPI installation, please install or adjust CFLAGS, LDFLAGS"])
fi
......@@ -217,385 +179,6 @@ if test x"${enable_timings}" = x"yes"; then
fi
fi
AC_MSG_CHECKING(whether double-precision SSE assembly kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse_assembly=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembly kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse_assembly=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${can_compile_sse_assembly}" = x"no" ; then
AC_MSG_WARN([Cannot compile single-precision SSE assembly kernel: disabling SSE assembly kernels alltogether])
fi
fi
dnl check whether on can compile with sse-gcc intrinsics
AC_MSG_CHECKING(whether we can compile SSE3 with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m128d h1 = _mm_loaddup_pd(q);
return 0;
}
])],
[can_compile_sse_intrinsics=yes],
[can_compile_sse_intrinsics=no]
)
AC_MSG_RESULT([${can_compile_sse_intrinsics}])
if test "${can_compile_sse_intrinsics}" = "yes"; then
install_real_sse_intrinsics=yes
install_real_sse_block2=yes
install_real_sse_block4=yes
install_real_sse_block6=yes
install_complex_sse_intrinsics=yes
install_complex_sse_block1=yes
install_complex_sse_block2=yes
else
install_real_sse_intrinsics=no
install_real_sse_block2=no
install_real_sse_block4=no
install_real_sse_block6=no
install_complex_sse_intrinsics=no
install_complex_sse_block1=no
install_complex_sse_block2=no
fi
AC_MSG_CHECKING(whether --enable-avx is specified)
AC_ARG_ENABLE([avx],
AS_HELP_STRING([--enable-avx],
[check whether AVX kernels can be build, default yes]),
[check_avx=no],
[check_avx=yes])
AC_MSG_RESULT([$check_avx])
if test "${check_avx}" = "yes"; then
dnl check whether one can compile with avx - gcc intrinsics
dnl first pass: try with specified CFLAGS and CXXFLAGS
AC_MSG_CHECKING([whether we can compile AVX gcc intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d a1_1 = _mm256_load_pd(q);
return 0;
}
])],
[can_compile_avx=yes],
[can_compile_avx=no]
)
AC_MSG_RESULT([${can_compile_avx}])
else
can_compile_avx=no
fi
#if test "${can_compile_avx}" = "yes" ; then
# AC_MSG_CHECKING([whether we can compile AVX gcc intrinsics in C++])
# AC_LANG_PUSH([C++])
# AC_COMPILE_IFELSE([AC_LANG_SOURCE([
# #include <x86intrin.h>
# int main(int argc, char **argv){
# double* q;
# __m256d a1_1 = _mm256_load_pd(q);
# return 0;
# }
# ])],
# [can_compile_avx=yes],
# [can_compile_avx=no]
# )
# AC_LANG_POP([C++])
# AC_MSG_RESULT([${can_compile_avx}])
# if test "${can_compile_avx}" = "no" ; then
# AC_MSG_WARN([Cannot compile C++ with AVX: disabling AVX alltogether])
# fi
#fi
dnl first pass: try with specified CFLAGS and CXXFLAGS
AC_MSG_CHECKING(whether --enable-avx is specified)
AC_ARG_ENABLE([avx],
AS_HELP_STRING([--enable-avx],
[check whether AVX kernels can be build, default yes]),
[check_avx=no],
[check_avx=yes])
AC_MSG_RESULT([$check_avx])
if test "${check_avx}" = "yes"; then
AC_MSG_CHECKING([whether we can compile AVX intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d a1_1 = _mm256_load_pd(q);
return 0;
}
])],
[can_compile_avx=yes],
[can_compile_avx=no]
)
AC_MSG_RESULT([${can_compile_avx}])
else
can_compile_avx=no
fi
dnl if test "${can_compile_avx}" = "yes" ; then
dnl AC_MSG_CHECKING([whether we can compile AVX intrinsics in C++])
dnl AC_LANG_PUSH([C++])
dnl AC_COMPILE_IFELSE([AC_LANG_SOURCE([
dnl #include <x86intrin.h>
dnl int main(int argc, char **argv){
dnl double* q;
dnl __m256d a1_1 = _mm256_load_pd(q);
dnl return 0;
dnl }
dnl ])],
dnl [can_compile_avx=yes],
dnl [can_compile_avx=no]
dnl )
dnl AC_LANG_POP([C++])
dnl AC_MSG_RESULT([${can_compile_avx}])
dnl if test "${can_compile_avx}" = "no" ; then
dnl AC_MSG_WARN([Cannot compile C++ with AVX: disabling AVX alltogether])
dnl fi
dnl fi
AC_MSG_CHECKING(whether --enable-avx2 is specified)
AC_ARG_ENABLE([avx2],
AS_HELP_STRING([--enable-avx2],
[check whether AVX2 kernels can be build, default yes]),
[check_avx2=no],
[check_avx2=yes])
AC_MSG_RESULT([$check_avx2])
if test "${check_avx2}" = "yes"; then
AC_MSG_CHECKING([whether we can compile AVX2 gcc intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d q1 = _mm256_load_pd(q);
__m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
return 0;
}
])],
[can_compile_avx2=yes],
[can_compile_avx2=no]
)
AC_MSG_RESULT([${can_compile_avx2}])
else
can_compile_avx2=no
fi
#if test "${can_compile_avx2}" = "yes" ; then
# AC_MSG_CHECKING([whether we can compile AVX2 gcc intrinsics in C++])
# AC_LANG_PUSH([C++])
# AC_COMPILE_IFELSE([AC_LANG_SOURCE([
# #include <x86intrin.h>
# int main(int argc, char **argv){
# double* q;
# __m256d q1 = _mm256_load_pd(q);
# __m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
# return 0;
# }
# ])],
# [can_compile_avx2=yes],
# [can_compile_avx2=no]
# )
# AC_LANG_POP([C++])
# AC_MSG_RESULT([${can_compile_avx2}])
# if test "${can_compile_avx2}" = "no" ; then
# AC_MSG_WARN([Cannot compile C++ with AVX2!])
# fi
#fi
AC_MSG_CHECKING(whether --enable-avx512 is specified)
AC_ARG_ENABLE([avx512],
AS_HELP_STRING([--enable-avx512],
[check whether AVX512 kernels can be build, default yes]),
[check_avx512=no],
[check_avx512=yes])
AC_MSG_RESULT([$check_avx512])
if test "${check_avx512}" = "yes"; then
AC_MSG_CHECKING([whether we can compile AVX512 gcc intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m512d q1 = _mm512_load_pd(q);
__m512d y1 = _mm512_fmadd_pd(q1, q1, q1);
return 0;
}
])],
[can_compile_avx512=yes],
[can_compile_avx512=no]
)
AC_MSG_RESULT([${can_compile_avx512}])
else
can_compile_avx512=no
fi
#if test "${can_compile_avx512}" = "yes" ; then
# AC_MSG_CHECKING([whether we can compile AVX512 gcc intrinsics in C++])
# AC_LANG_PUSH([C++])
# AC_COMPILE_IFELSE([AC_LANG_SOURCE([
# #include <x86intrin.h>
# int main(int argc, char **argv){
# double* q;
# __m512d q1 = _mm512_load_pd(q);
# __m512d y1 = _mm512_fmadd_pd(q1, q1, q1);
# return 0;
# }
# ])],
# [can_compile_avx512=yes],
# [can_compile_avx512=no]
# )
# AC_LANG_POP([C++])
# AC_MSG_RESULT([${can_compile_avx512}])
# if test "${can_compile_avx512}" = "no" ; then
# AC_MSG_WARN([Cannot compile C++ with AVX512!])
# fi
#fi
dnl if test "${can_compile_avx2}" = "yes" ; then
dnl AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C++])
dnl AC_LANG_PUSH([C++])
dnl AC_COMPILE_IFELSE([AC_LANG_SOURCE([
dnl #include <x86intrin.h>
dnl int main(int argc, char **argv){
dnl double* q;
dnl __m256d q1 = _mm256_load_pd(q);
dnl __m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
dnl return 0;
dnl }
dnl ])],
dnl [can_compile_avx2=yes],
dnl [can_compile_avx2=no]
dnl )
dnl AC_LANG_POP([C++])
dnl AC_MSG_RESULT([${can_compile_avx2}])
dnl if test "${can_compile_avx2}" = "no" ; then
dnl AC_MSG_WARN([Cannot compile C++ with AVX2!])
dnl fi
dnl fi
if test "${can_compile_avx}" = "yes" ; then
install_real_avx_block2=yes
install_real_avx_block4=yes
install_real_avx_block6=yes
install_complex_avx_block1=yes
install_complex_avx_block2=yes
else
install_real_avx_block2=no
install_real_avx_block4=no
install_real_avx_block6=no
install_complex_avx_block1=no
install_complex_avx_block2=no
fi
if test "${can_compile_avx2}" = "yes" ; then
install_real_avx2_block2=yes
install_real_avx2_block4=yes
install_real_avx2_block6=yes
install_complex_avx2_block1=yes
install_complex_avx2_block2=yes
else
install_real_avx2_block2=no
install_real_avx2_block4=no
install_real_avx2_block6=no
install_complex_avx2_block1=no
install_complex_avx2_block2=no
fi
if test "${can_compile_avx512}" = "yes" ; then
install_real_avx512_block2=yes
install_real_avx512_block4=yes
install_real_avx512_block6=yes
install_complex_avx512_block1=yes
install_complex_avx512_block2=yes
else
install_real_avx512_block2=no
install_real_avx512_block4=no
install_real_avx512_block6=no
install_complex_avx512_block1=no
install_complex_avx512_block2=no
fi
AM_CONDITIONAL([HAVE_SSE_ASSEMBLY],[test x"$can_compile_sse_assembly" = x"yes"])
if test x"${can_compile_sse_assembly}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_ASSEMBLY],[1],[assembly SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_SSE_INTRINSICS],[test x"$can_compile_sse_intrinsics" = x"yes"])
if test x"${can_compile_sse_intrinsics}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_INTRINSICS],[1],[gcc intrinsics SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_AVX],[test x"$can_compile_avx" = x"yes"])
if test x"${can_compile_avx}" = x"yes" ; then
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_AVX2],[test x"$can_compile_avx2" = x"yes"])
if test x"${can_compile_avx2}" = x"yes" ; then
AC_DEFINE([HAVE_AVX2],[1],[AVX2 is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_AVX512],[test x"$can_compile_avx512" = x"yes"])
if test x"${can_compile_avx512}" = x"yes" ; then
AC_DEFINE([HAVE_AVX512],[1],[AVX512 is supported on this CPU])
fi
dnl set the AVX optimization flags if this option is specified
AC_MSG_CHECKING(whether AVX optimization flags should be set automatically)
AC_ARG_WITH([avx-optimization],
AS_HELP_STRING([--with-avx-optimization],
[use AVX optimization, default no.]),
[with_avx_optimization=yes],
[with_avx_optimization=no])
AC_MSG_RESULT([${with_avx_optimization}])
if test x"${with_avx_optimization}" = x"yes"; then
CFLAGS="$CFLAGS -funsafe-loop-optimizations -funsafe-math-optimizations -ftree-vect-loop-version -ftree-vectorize"
CXXFLAGS="$CXXFLAGS -funsafe-loop-optimizations -funsafe-math-optimizations -ftree-vect-loop-version -ftree-vectorize"
fi
AC_LANG([Fortran])
AC_FC_FREEFORM
AC_FC_MODULE_FLAG
AC_FC_MODULE_OUTPUT_FLAG
AC_FC_LIBRARY_LDFLAGS
save_FCFLAGS=$FCFLAGS
save_LDFLAGS=$LDFLAGS
......@@ -609,7 +192,7 @@ dnl check whether fortran error_unit is defined
AC_MSG_CHECKING([whether Fortran module iso_fortran_env is available])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
program test_error_unit
use ISO_FORTRAN_ENV, only : error_unit
use iso_fortran_env, only : error_unit
implicit none
write(error_unit,*) "error_unit is defined"
......@@ -619,11 +202,13 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
[can_use_iso_fortran_env=no]
)
AC_MSG_RESULT([${can_use_iso_fortran_env}])
if test x"${can_use_iso_fortran_env}" = x"yes" ; then
AC_DEFINE([HAVE_ISO_FORTRAN_ENV],[1],[can use module iso_fortran_env])
fi
dnl check whether one can link with specified MKL (desired method)
AC_MSG_CHECKING([whether we can compile a Fortran program using MKL])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
program test_mkl
use mkl_service
......@@ -742,6 +327,7 @@ dnl important: reset them again!
FCFLAGS=$save_FCFLAGS
LDFLAGS=$save_LDFLAGS
dnl check for intrinsic fortran function of 2003 standard
AC_MSG_CHECKING([whether we can use the intrinsic Fortran function "get_environment_variable"])
......@@ -760,6 +346,7 @@ if test x"${fortran_can_check_environment}" = x"yes" ; then
AC_DEFINE([HAVE_ENVIRONMENT_CHECKING],[1],[Fortran can query environment variables])
fi
dnl check whether BAND_TO_FULL_BLOCKING is set
use_band_to_full_blocking=yes
AC_MSG_CHECKING(whether BAND_TO_FLULL_BLOCKING is requested)
......@@ -777,17 +364,6 @@ if test x"${use_band_to_full_blocking}" = x"yes"; then
AC_DEFINE([BAND_TO_FULL_BLOCKING], [1], [use blocking in trans_ev_band_to_full])
fi
dnl check whether GPU version is requested
#CUDA_INSTALL_PATH="/usr/local/cuda/"
#CUDA_SDK_INSTALL_PATH="/usr/local/NVIDIA_GPU_Computing_SDK"
AC_MSG_CHECKING(whether GPU support is requested)
AC_ARG_ENABLE(gpu-support,[AS_HELP_STRING([--enable-gpu-support],
[build ELPA2 with GPU-support])],
want_gpu="yes", want_gpu="no")
AC_MSG_RESULT([${want_gpu}])
AC_ARG_WITH([cuda-path],[AS_HELP_STRING([--with-cuda-path=PATH],[prefix where CUDA is installed @<:@default=auto@:>@])],
[CUDA_INSTALL_PATH=$withval], [with_cuda=auto])
......@@ -795,15 +371,17 @@ AC_ARG_WITH([cuda-path],[AS_HELP_STRING([--with-cuda-path=PATH],[prefix where CU
AC_ARG_WITH([cuda-sdk-path],[AS_HELP_STRING([--with-cuda-sdk-path=PATH],[prefix where CUDA SDK is installed @<:@default=auto@:>@])],
[CUDA_SDK_INSTALL_PATH=$withval],[with_cuda_sdk=auto])
dnl setup nvcc flags and use them in later tests
user_sets_gpu_compute_capability="no"
AC_MSG_CHECKING(whether a GPU compute capability is specified)
AC_ARG_WITH([GPU-compute-capability],
[AS_HELP_STRING([--with-GPU-compute-capability=value],
[use compute capability "value" for GPU version (default sm_35)])],
[AS_HELP_STRING([--with-GPU-compute-capability=VALUE],
[use compute capability VALUE for GPU version, default: "sm_35"])],
[user_sets_gpu_compute_capability="yes"],[cuda_compute_capability="sm_35"])
AC_MSG_RESULT([${user_sets_gpu_compute_capability}])
dnl sanity check whether compute capability setting by user is reasonable
if test x"${user_sets_gpu_compute_capability}" = x"yes" ; then
dnl the user must set a value which starts with "sm_"
......@@ -815,92 +393,12 @@ if test x"${user_sets_gpu_compute_capability}" = x"yes" ; then
fi
fi
if test x"${want_gpu}" = x"yes" ; then
AC_LANG_PUSH([C])
CUDA_CFLAGS="$CUDA_CFLAGS -arch $cuda_compute_capability -O2 -I$CUDA_INSTALL_PATH/include"
LDFLAGS="$LDFLAGS -L$CUDA_INSTALL_PATH/lib64"
NVCCFLAGS="$NVCCFLAGS $CUDA_CFLAGS $CUDA_LDFLAGS"
NVCC="nvcc"
AC_SUBST(NVCC)
AC_SUBST(NVCCFLAGS)
dnl check whether nvcc compiler is found
AC_CHECK_PROG(nvcc_found,nvcc,yes,no)
if test x"${nvcc_found}" = x"no" ; then
AC_MSG_ERROR([nvcc not found; try to set the cuda-path or disable GPU support])
fi
dnl check whether we find cublas
AC_SEARCH_LIBS([cublasDgemm],[cublas],[have_cublas=yes],[have_cublas=no])
if test x"${have_cublas}" = x"no"; then
AC_MSG_ERROR([Could not link cublas; try to set the cuda-path or disable GPU support])
fi
AC_SEARCH_LIBS([cudaMemcpy],[cudart],[have_cudart=yes],[have_cudart=no])
if test x"${have_cudart}" = x"no"; then
AC_MSG_ERROR([Could not link cudart; try to set the cuda-path or disable GPU support])
fi
AC_LANG_POP([C])
install_gpu=yes
can_compile_gpu=yes
fi
dnl now check which kernels can be compiled
dnl the checks for SSE were already done before
dnl the checks for AVX were already done before
dnl check BGP kernel
AC_MSG_CHECKING([whether we can compile with BGP intrinsics])
AC_LINK_IFELSE([AC_LANG_SOURCE([
program test_bgp