Commit d03a603a authored by Andreas Marek's avatar Andreas Marek

Move reset_dotp_buffes from complex into real case Cuda file

parent 0a869719
......@@ -387,35 +387,6 @@ __device__ __forceinline__ void sync_threads()
#define ALREADY_DEFINED_SYNC 1
#endif
// Reset the entire contents of a shared reduction block; the thread block size must be a power-of-2
#ifdef DOUBLE_PRECISION_COMPLEX
__device__ __forceinline__ void reset_dotp_buffers_double(double * const __restrict__ s_block)
#else
__device__ __forceinline__ void reset_dotp_buffers_single(float * const __restrict__ s_block)
#endif
{
if (blockDim.x >= 64)
{
int t_idx = threadIdx.x;
if (t_idx < 64)
{
s_block[t_idx] = s_block[t_idx + 64] = 0.0;
}
}
else
{
int s_chunk = 128 / blockDim.x;
#ifdef DOUBLE_PRECISION_COMPLEX
int s_chunk_size = s_chunk * sizeof(double);
#else
int s_chunk_size = s_chunk * sizeof(float);
#endif
// Each thread resets an equally-sized, contiguous portion of the buffer
memset(s_block + threadIdx.x * s_chunk, 0, s_chunk_size);
}
}
#ifdef DOUBLE_PRECISION_COMPLEX
__device__ void reset_dotp_buffers_complex_double( cuDoubleComplex * const __restrict__ s_block)
#else
......
......@@ -89,6 +89,35 @@ static __device__ __forceinline__ cuDoubleComplex shfl_xor_complex(cuDoubleComp
//#define ALREADY_DEFINED_SYNC 1
//#endif
// Reset the entire contents of a shared reduction block; the thread block size must be a power-of-2
#ifdef DOUBLE_PRECISION_REAL
__device__ __forceinline__ void reset_dotp_buffers_double(double * const __restrict__ s_block)
#else
__device__ __forceinline__ void reset_dotp_buffers_single(float * const __restrict__ s_block)
#endif
{
if (blockDim.x >= 64)
{
int t_idx = threadIdx.x;
if (t_idx < 64)
{
s_block[t_idx] = s_block[t_idx + 64] = 0.0;
}
}
else
{
int s_chunk = 128 / blockDim.x;
#ifdef DOUBLE_PRECISION_REAL
int s_chunk_size = s_chunk * sizeof(double);
#else
int s_chunk_size = s_chunk * sizeof(float);
#endif
// Each thread resets an equally-sized, contiguous portion of the buffer
memset(s_block + threadIdx.x * s_chunk, 0, s_chunk_size);
}
}
// =========================
// Backtransformation kernel
// =========================
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment