From d304be57b04cbb38dfd628c8b57b2d40506f9335 Mon Sep 17 00:00:00 2001 From: Tobias Winchen <tobias.winchen@rwth-aachen.de> Date: Fri, 13 Mar 2020 14:54:55 +0100 Subject: [PATCH] Use function for reduction in gating kernel --- .../edd/detail/GatedSpectrometer.cu | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu b/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu index bcc0f88a..853a9af7 100644 --- a/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu +++ b/psrdada_cpp/effelsberg/edd/detail/GatedSpectrometer.cu @@ -17,13 +17,29 @@ namespace psrdada_cpp { namespace effelsberg { namespace edd { +template<typename T> +__device__ void reduce(T *x, const T &v) +{ + x[threadIdx.x] = v; + __syncthreads(); + for(int s = blockDim.x / 2; s > 0; s = s / 2) + { + if (threadIdx.x < s) + x[threadIdx.x] += x[threadIdx.x + s]; + __syncthreads(); + } + + +} + + __global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const uint64_t* __restrict__ sideChannelData, size_t N, size_t heapSize, size_t bitpos, size_t noOfSideChannels, size_t selectedSideChannel, const float* __restrict__ _baseLineN, uint64_cu* stats_G0, uint64_cu* stats_G1) { float baseLine = (*_baseLineN) / N; - // statistics values for samopels to G0, G1 + // statistics values for samopels to G0, G1 uint32_t _G0stats = 0; uint32_t _G1stats = 0; @@ -47,32 +63,13 @@ __global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const uin __shared__ uint32_t x[1024]; // Reduce G0, G1 - x[threadIdx.x] = _G0stats; - __syncthreads(); - for(int s = blockDim.x / 2; s > 0; s = s / 2) - { - if (threadIdx.x < s) - x[threadIdx.x] += x[threadIdx.x + s]; - __syncthreads(); - } - + reduce<uint32_t>(x, _G0stats); if(threadIdx.x == 0) atomicAdd(stats_G0, (uint64_cu) x[threadIdx.x]); - x[threadIdx.x] = _G1stats; - __syncthreads(); - for(int s = blockDim.x / 2; s > 0; s = s / 2) - { - if (threadIdx.x < s) - x[threadIdx.x] += x[threadIdx.x + s]; - __syncthreads(); - } - + reduce<uint32_t>(x, _G1stats); if(threadIdx.x == 0) - { - uint64_cu y = x[threadIdx.x]; - atomicAdd(stats_G1, y) ; - } + atomicAdd(stats_G1, (uint64_cu) x[threadIdx.x]); } @@ -420,7 +417,7 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b size_t samples_lost = _nsamps_per_output_spectra - (*on_values) - (*off_values); total_samples_lost += samples_lost; - BOOST_LOG_TRIVIAL(info) << " Heap " << i << ":\n" + BOOST_LOG_TRIVIAL(info) << " Heap " << i << ":\n" <<" Samples with bit set : " << *on_values << std::endl <<" Samples without bit set: " << *off_values << std::endl <<" Samples lost : " << samples_lost << " out of " << _nsamps_per_output_spectra << std::endl; -- GitLab