Remove C++ dependency

The C++ kernels can be written as C kernels, which simplifies the build
procedure
parent 12b958fd
......@@ -106,19 +106,19 @@ if WITH_REAL_AVX_BLOCK6_KERNEL
endif
if WITH_COMPLEX_SSE_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse_1hv.cpp
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse_1hv.c
endif
if WITH_COMPLEX_AVX_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.cpp
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.c
endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse_2hv.cpp
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse_2hv.c
endif
if WITH_COMPLEX_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.cpp
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.c
endif
# install any .mod files in the include/ dir
......
......@@ -43,10 +43,10 @@ if test x$_cv_gnu_make_command = x ; then
AC_MSG_ERROR([Need GNU Make])
fi
AC_CHECK_PROG(CPP_FOUND,cpp,yes,no)
if test x"${CPP_FOUND}" = xno; then
AC_MSG_ERROR([no cpp found])
fi
#AC_CHECK_PROG(CPP_FOUND,cpp,yes,no)
#if test x"${CPP_FOUND}" = xno; then
# AC_MSG_ERROR([no cpp found])
#fi
# gnu-make fortran module dependencies
m4_include([fdep/fortran_dependencies.m4])
......@@ -112,17 +112,17 @@ if test x"${enable_openmp}" = x"yes"; then
FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS"
fi
# C++
AC_LANG([C++])
AC_PROG_CXX
if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP
if test "$ac_cv_prog_cxx_openmp" = unsupported; then
AC_MSG_ERROR([Could not compile a C++ program with OpenMP, adjust CXXFLAGS])
fi
CXXFLAGS="$OPENMP_CXXFLAGS $CXXFLAGS"
fi
## C++
#AC_LANG([C++])
#AC_PROG_CXX
#
#if test x"${enable_openmp}" = x"yes"; then
# AX_ELPA_OPENMP
# if test "$ac_cv_prog_cxx_openmp" = unsupported; then
# AC_MSG_ERROR([Could not compile a C++ program with OpenMP, adjust CXXFLAGS])
# fi
# CXXFLAGS="$OPENMP_CXXFLAGS $CXXFLAGS"
#fi
......@@ -240,26 +240,26 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
)
AC_MSG_RESULT([${can_compile_avx}])
if test "${can_compile_avx}" = "yes" ; then
AC_MSG_CHECKING([whether we can compile AVX intrinsics in C++])
AC_LANG_PUSH([C++])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d a1_1 = _mm256_load_pd(q);
return 0;
}
])],
[can_compile_avx=yes],
[can_compile_avx=no]
)
AC_LANG_POP([C++])
AC_MSG_RESULT([${can_compile_avx}])
if test "${can_compile_avx}" = "no" ; then
AC_MSG_WARN([Cannot compile C++ with AVX: disabling AVX alltogether])
fi
fi
#if test "${can_compile_avx}" = "yes" ; then
# AC_MSG_CHECKING([whether we can compile AVX intrinsics in C++])
# AC_LANG_PUSH([C++])
# AC_COMPILE_IFELSE([AC_LANG_SOURCE([
# #include <x86intrin.h>
# int main(int argc, char **argv){
# double* q;
# __m256d a1_1 = _mm256_load_pd(q);
# return 0;
# }
# ])],
# [can_compile_avx=yes],
# [can_compile_avx=no]
# )
# AC_LANG_POP([C++])
# AC_MSG_RESULT([${can_compile_avx}])
# if test "${can_compile_avx}" = "no" ; then
# AC_MSG_WARN([Cannot compile C++ with AVX: disabling AVX alltogether])
# fi
#fi
AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
......@@ -275,27 +275,27 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
[can_compile_avx2=no]
)
AC_MSG_RESULT([${can_compile_avx2}])
if test "${can_compile_avx2}" = "yes" ; then
AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C++])
AC_LANG_PUSH([C++])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d q1 = _mm256_load_pd(q);
__m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
return 0;
}
])],
[can_compile_avx2=yes],
[can_compile_avx2=no]
)
AC_LANG_POP([C++])
AC_MSG_RESULT([${can_compile_avx2}])
if test "${can_compile_avx2}" = "no" ; then
AC_MSG_WARN([Cannot compile C++ with AVX2!])
fi
fi
#if test "${can_compile_avx2}" = "yes" ; then
# AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C++])
# AC_LANG_PUSH([C++])
# AC_COMPILE_IFELSE([AC_LANG_SOURCE([
# #include <x86intrin.h>
# int main(int argc, char **argv){
# double* q;
# __m256d q1 = _mm256_load_pd(q);
# __m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
# return 0;
# }
# ])],
# [can_compile_avx2=yes],
# [can_compile_avx2=no]
# )
# AC_LANG_POP([C++])
# AC_MSG_RESULT([${can_compile_avx2}])
# if test "${can_compile_avx2}" = "no" ; then
# AC_MSG_WARN([Cannot compile C++ with AVX2!])
# fi
#fi
if test "${can_compile_avx}" = "yes" ; then
install_real_avx_block2=yes
......@@ -854,7 +854,7 @@ grep -h "^ *!c>" $srcdir/src/elpa_c_interface.F90 | sed 's/^ *!c>//;' > elpa/elp
echo "Generating Fortran interfaces for C kernels"
grep -h "^ *!f>" $srcdir/src/elpa2_kernels/*.c | sed 's/^ *!f>//;' > elpa/elpa_generated_fortran_interfaces.h || exit 1
grep -h "^ *!f>" $srcdir/src/elpa2_kernels/*.cpp | sed 's/^ *!f>//;' >> elpa/elpa_generated_fortran_interfaces.h || exit 1
#grep -h "^ *!f>" $srcdir/src/elpa2_kernels/*.cpp | sed 's/^ *!f>//;' >> elpa/elpa_generated_fortran_interfaces.h || exit 1
echo "Generating test/shared_sources/generated.h..."
mkdir -p test/shared_sources
......
......@@ -61,7 +61,7 @@
// --------------------------------------------------------------------------------------------------
#include "config-f90.h"
#include <complex>
#include <complex.h>
#include <x86intrin.h>
#define __forceinline __attribute__((always_inline))
......@@ -82,12 +82,10 @@
#endif
extern "C" {
//Forward declaration
static __forceinline void hh_trafo_complex_kernel_12_AVX_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_12_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq);
/*
!f>#ifdef HAVE_AVX
......@@ -102,7 +100,7 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(std::complex<double
!f>#endif
*/
void single_hh_trafo_complex_avx_avx2_1hv(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq)
void single_hh_trafo_complex_avx_avx2_1hv(double complex* q, double complex* hh, int* pnb, int* pnq, int* pldq)
{
int i;
int nb = *pnb;
......@@ -124,7 +122,7 @@ void single_hh_trafo_complex_avx_avx2_1hv(std::complex<double>* q, std::complex<
}
}
static __forceinline void hh_trafo_complex_kernel_12_AVX_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq)
static __forceinline void hh_trafo_complex_kernel_12_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -319,7 +317,7 @@ void single_hh_trafo_complex_avx_avx2_1hv(std::complex<double>* q, std::complex<
}
}
static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq)
static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -464,7 +462,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(std::complex<double>
}
}
static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq)
static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -558,4 +556,3 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(std::complex<double>
_mm256_store_pd(&q_dbl[(2*i*ldq)+4], q2);
}
}
} // extern C
......@@ -61,7 +61,7 @@
// --------------------------------------------------------------------------------------------------
#include "config-f90.h"
#include <complex>
#include <complex.h>
#include <x86intrin.h>
#define __forceinline __attribute__((always_inline))
......@@ -82,13 +82,11 @@
#endif
extern "C" {
//Forward declaration
static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_6_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
static __forceinline void hh_trafo_complex_kernel_6_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
/*
!f>#ifdef HAVE_AVX
......@@ -103,7 +101,7 @@ static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(std::complex<double>
!f>#endif
*/
void double_hh_trafo_complex_avx_avx2_2hv(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq, int* pldh)
void double_hh_trafo_complex_avx_avx2_2hv(double complex* q, double complex* hh, int* pnb, int* pnq, int* pldq, int* pldh)
{
int i;
int nb = *pnb;
......@@ -111,7 +109,7 @@ void double_hh_trafo_complex_avx_avx2_2hv(std::complex<double>* q, std::complex<
int ldq = *pldq;
int ldh = *pldh;
std::complex<double> s = conj(hh[(ldh)+1])*1.0;
double complex s = conj(hh[(ldh)+1])*1.0;
for (i = 2; i < nb; i++)
{
s += hh[i-1] * conj(hh[(i+ldh)]);
......@@ -142,7 +140,7 @@ void double_hh_trafo_complex_avx_avx2_2hv(std::complex<double>* q, std::complex<
#endif
}
static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -575,7 +573,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv(std::complex<double>
_mm256_store_pd(&q_dbl[(2*nb*ldq)+12], q4);
}
static __forceinline void hh_trafo_complex_kernel_6_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_6_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -928,7 +926,7 @@ static __forceinline void hh_trafo_complex_kernel_6_AVX_2hv(std::complex<double>
_mm256_store_pd(&q_dbl[(2*nb*ldq)+8], q3);
}
static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -1201,7 +1199,7 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(std::complex<double>
_mm256_store_pd(&q_dbl[(2*nb*ldq)+4], q2);
}
static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -1393,4 +1391,3 @@ static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(std::complex<double>
_mm256_store_pd(&q_dbl[(2*nb*ldq)+0], q1);
}
} // extern C
......@@ -62,7 +62,7 @@
#include "config-f90.h"
#include <complex>
#include <complex.h>
#include <x86intrin.h>
#define __forceinline __attribute__((always_inline))
......@@ -72,12 +72,10 @@
#endif
extern "C" {
//Forward declaration
static __forceinline void hh_trafo_complex_kernel_6_SSE_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_6_SSE_1hv(double complex* q, double complex* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv(double complex* q, double complex* hh, int nb, int ldq);
static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv(double complex* q, double complex* hh, int nb, int ldq);
/*
!f>#ifdef HAVE_SSE
......@@ -92,7 +90,7 @@ static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv(std::complex<double>
!f>#endif
*/
void single_hh_trafo_complex_sse_1hv(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq)
void single_hh_trafo_complex_sse_1hv(double complex* q, double complex* hh, int* pnb, int* pnq, int* pldq)
{
int i;
int nb = *pnb;
......@@ -114,7 +112,7 @@ void single_hh_trafo_complex_sse_1hv(std::complex<double>* q, std::complex<doubl
}
}
static __forceinline void hh_trafo_complex_kernel_6_SSE_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq)
static __forceinline void hh_trafo_complex_kernel_6_SSE_1hv(double complex* q, double complex* hh, int nb, int ldq)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -309,7 +307,7 @@ static __forceinline void hh_trafo_complex_kernel_6_SSE_1hv(std::complex<double>
}
}
static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq)
static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv(double complex* q, double complex* hh, int nb, int ldq)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -454,7 +452,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv(std::complex<double>
}
}
static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq)
static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv(double complex* q, double complex* hh, int nb, int ldq)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -548,4 +546,3 @@ static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv(std::complex<double>
_mm_store_pd(&q_dbl[(2*i*ldq)+2], q2);
}
}
} // extern C
......@@ -61,7 +61,7 @@
// --------------------------------------------------------------------------------------------------
#include "config-f90.h"
#include <complex>
#include <complex.h>
#include <x86intrin.h>
#define __forceinline __attribute__((always_inline))
......@@ -70,13 +70,12 @@
#undef __AVX__
#endif
extern "C" {
//Forward declaration
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_3_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_1_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s);
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
static __forceinline void hh_trafo_complex_kernel_3_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
static __forceinline void hh_trafo_complex_kernel_1_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
/*
!f>#ifdef HAVE_SSE
......@@ -91,7 +90,7 @@ static __forceinline void hh_trafo_complex_kernel_1_SSE_2hv(std::complex<double>
!f>#endif
*/
void double_hh_trafo_complex_sse_2hv(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq, int* pldh)
void double_hh_trafo_complex_sse_2hv(double complex* q, double complex* hh, int* pnb, int* pnq, int* pldq, int* pldh)
{
int i;
int nb = *pnb;
......@@ -99,7 +98,7 @@ void double_hh_trafo_complex_sse_2hv(std::complex<double>* q, std::complex<doubl
int ldq = *pldq;
int ldh = *pldh;
std::complex<double> s = conj(hh[(ldh)+1])*1.0;
double complex s = conj(hh[(ldh)+1])*1.0;
for (i = 2; i < nb; i++)
{
s += hh[i-1] * conj(hh[(i+ldh)]);
......@@ -126,7 +125,7 @@ void double_hh_trafo_complex_sse_2hv(std::complex<double>* q, std::complex<doubl
#endif
}
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -559,7 +558,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv(std::complex<double>
_mm_store_pd(&q_dbl[(2*nb*ldq)+6], q4);
}
static __forceinline void hh_trafo_complex_kernel_3_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_3_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -912,7 +911,7 @@ static __forceinline void hh_trafo_complex_kernel_3_SSE_2hv(std::complex<double>
_mm_store_pd(&q_dbl[(2*nb*ldq)+4], q3);
}
static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -1185,7 +1184,7 @@ static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv(std::complex<double>
_mm_store_pd(&q_dbl[(2*nb*ldq)+2], q2);
}
static __forceinline void hh_trafo_complex_kernel_1_SSE_2hv(std::complex<double>* q, std::complex<double>* hh, int nb, int ldq, int ldh, std::complex<double> s)
static __forceinline void hh_trafo_complex_kernel_1_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
{
double* q_dbl = (double*)q;
double* hh_dbl = (double*)hh;
......@@ -1377,4 +1376,3 @@ static __forceinline void hh_trafo_complex_kernel_1_SSE_2hv(std::complex<double>
_mm_store_pd(&q_dbl[(2*nb*ldq)+0], q1);
}
} // extern C
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment