Commit 3bb78876 authored by Andreas Marek's avatar Andreas Marek
Browse files

Fix error in compute_hh_trafo

parent a4fc25e4
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -373,7 +373,7 @@ instruction_set = { ...@@ -373,7 +373,7 @@ instruction_set = {
"sse" : " --enable-sse --enable-sse-assembly", "sse" : " --enable-sse --enable-sse-assembly",
"avx" : " --enable-avx", "avx" : " --enable-avx",
"avx2" : " --enable-avx2", "avx2" : " --enable-avx2",
"power8" : " --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-mpi-module --with-GPU-compute-capability=sm_60 ", "power8" : " --enable-vsx --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-mpi-module --with-GPU-compute-capability=sm_60 ",
} }
......
...@@ -544,7 +544,7 @@ ...@@ -544,7 +544,7 @@
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off), & & (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off), &
nbw, nl, stripe_width) nbw, nl, stripe_width)
#endif #endif
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP */
...@@ -877,6 +877,43 @@ ...@@ -877,6 +877,43 @@
#endif /* REALCASE == 1 */ #endif /* REALCASE == 1 */
#if REALCASE == 1
! implementation of sse block 2 real case
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK2) then
#endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL) && !defined(WITH_REAL_SSE_BLOCK4_KERNEL))
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo_&
&MATH_DATATYPE&
&_sse_2hv_&
&PRECISION &
& (c_loc(a(1,j+off+a_off-1,istripe,my_thread)), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_&
&MATH_DATATYPE&
&_sse_2hv_&
&PRECISION &
& (c_loc(a(1,j+off+a_off-1,istripe)), w, nbw, nl, stripe_width, nbw)
#endif
enddo
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL) && !defined(WITH_REAL_SSE_BLOCK4_KERNEL)) */
#ifndef WITH_FIXED_REAL_KERNEL
endif
#endif /* not WITH_FIXED_REAL_KERNEL */
#endif /* WITH_REAL_SSE_BLOCK2_KERNEL */
#endif /* REALCASE == 1 */
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! implementation of sparc64 block 2 complex case ! implementation of sparc64 block 2 complex case
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment