Commit e4c4919e authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Added counting of flags in sidechannels

parent e71e9823
......@@ -78,7 +78,8 @@ private:
void process(thrust::device_vector<RawVoltageType> const &digitiser_raw,
thrust::device_vector<RawVoltageType> const &sideChannelData,
thrust::device_vector<IntegratedPowerType> &detected_G0,
thrust::device_vector<IntegratedPowerType> &detected_G1);
thrust::device_vector<IntegratedPowerType> &detected_G1,
thrust::device_vector<int> &noOfBitSet);
private:
std::size_t _buffer_bytes;
......@@ -106,6 +107,7 @@ private:
DoubleDeviceBuffer<IntegratedPowerType> _power_db_G0;
DoubleDeviceBuffer<IntegratedPowerType> _power_db_G1;
DoubleDeviceBuffer<RawVoltageType> _sideChannelData_db;
DoubleDeviceBuffer<int> _noOfBitSetsInSideChannel;
thrust::device_vector<UnpackedVoltageType> _unpacked_voltage_G0;
thrust::device_vector<UnpackedVoltageType> _unpacked_voltage_G1;
......
......@@ -3,6 +3,7 @@
#include "psrdada_cpp/cuda_utils.hpp"
#include "psrdada_cpp/raw_bytes.hpp"
#include <cuda.h>
#include <cuda_profiler_api.h>
#include <iostream>
......@@ -30,6 +31,32 @@ __global__ void gating(float *G0, float *G1, const int64_t *sideChannelData,
}
}
__global__ void countBitSet(const int64_t *sideChannelData, size_t N, int64_t bitpos, int64_t noOfSideChannels, int64_t selectedSideChannel, int *nBitsSet)
{
// really not optimized reduction, but here only trivial array sizes.
int i = blockIdx.x*blockDim.x + threadIdx.x;
__shared__ int x[256];
if (i == 0)
nBitsSet[0] = 0;
if (i < N / noOfSideChannels)
x[threadIdx.x] = TEST_BIT(sideChannelData[i * noOfSideChannels + selectedSideChannel], bitpos);
else
x[threadIdx.x] = 0;
__syncthreads();
for(int s = blockDim.x / 2; s > 0; s = s / 2)
{
if (threadIdx.x < s)
x[threadIdx.x] += x[threadIdx.x + s];
__syncthreads();
}
if(threadIdx.x == 0)
atomicAdd(nBitsSet, x[threadIdx.x]);
}
template <class HandlerType>
GatedSpectrometer<HandlerType>::GatedSpectrometer(
......@@ -109,6 +136,7 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
<< _power_db_G1.size();
// on the host both power are stored in the same data buffer
_host_power_db.resize( _power_db_G0.size() + _power_db_G1 .size());
_noOfBitSetsInSideChannel.resize(1);
CUDA_ERROR_CHECK(cudaStreamCreate(&_h2d_stream));
CUDA_ERROR_CHECK(cudaStreamCreate(&_proc_stream));
......@@ -152,7 +180,7 @@ void GatedSpectrometer<HandlerType>::process(
thrust::device_vector<RawVoltageType> const &digitiser_raw,
thrust::device_vector<RawVoltageType> const &sideChannelData,
thrust::device_vector<IntegratedPowerType> &detected_G0,
thrust::device_vector<IntegratedPowerType> &detected_G1) {
thrust::device_vector<IntegratedPowerType> &detected_G1, thrust::device_vector<int> &noOfBitSet) {
BOOST_LOG_TRIVIAL(debug) << "Unpacking raw voltages";
switch (_nbits) {
case 8:
......@@ -167,7 +195,7 @@ void GatedSpectrometer<HandlerType>::process(
// raw voltage buffer is free again
CUDA_ERROR_CHECK(cudaEventRecord(_procB, _proc_stream));
BOOST_LOG_TRIVIAL(debug) << "Perfore gating";
BOOST_LOG_TRIVIAL(debug) << "Perform gating";
const int64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(sideChannelData.data()));
gating<<<1024, 1024, 0, _proc_stream>>>(
......@@ -175,7 +203,8 @@ void GatedSpectrometer<HandlerType>::process(
thrust::raw_pointer_cast(_unpacked_voltage_G1.data()), sideCD,
_unpacked_voltage_G0.size(), _speadHeapSize, _selectedBit, _nSideChannels,
_selectedSideChannel);
// CUDA_ERROR_CHECK(cudaStreamSynchronize(_proc_stream));
countBitSet<<<(sideChannelData.size()+255)/256, 256, 0, _proc_stream>>>(sideCD, sideChannelData.size(), _selectedBit, _nSideChannels, _selectedBit, thrust::raw_pointer_cast(noOfBitSet.data()));
BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1";
UnpackedVoltageType *_unpacked_voltage_ptr =
......@@ -207,7 +236,8 @@ bool GatedSpectrometer<HandlerType>::operator()(RawBytes &block) {
<< block.used_bytes() << " byte, expected "
<< _buffer_bytes << " byte)";
cudaDeviceSynchronize();
return true;
cudaProfilerStop();
return true;
}
// CUDA_ERROR_CHECK(cudaStreamSynchronize(_h2d_stream));
......@@ -237,9 +267,10 @@ bool GatedSpectrometer<HandlerType>::operator()(RawBytes &block) {
// Synchronize all streams
_power_db_G0.swap();
_power_db_G1.swap();
_noOfBitSetsInSideChannel.swap();
process(_raw_voltage_db.b(), _sideChannelData_db.b(), _power_db_G0.a(),
_power_db_G1.a());
_power_db_G1.a(), _noOfBitSetsInSideChannel.a());
// signal that data block has been processed
//CUDA_ERROR_CHECK(cudaStreamSynchronize(_proc_stream));
......@@ -262,6 +293,13 @@ bool GatedSpectrometer<HandlerType>::operator()(RawBytes &block) {
_power_db_G1.size() * sizeof(IntegratedPowerType), cudaMemcpyDeviceToHost,
_d2h_stream));
int R[1];
CUDA_ERROR_CHECK(cudaMemcpyAsync(static_cast<void *>(R),
static_cast<void *>(_noOfBitSetsInSideChannel.b_ptr()),
1 * sizeof(int),cudaMemcpyDeviceToHost, _d2h_stream));
BOOST_LOG_TRIVIAL(info) << "NOOF BIT SET IN SIDE CHANNEL: " << R[0] << std::endl;
if (_call_count == 3) {
return false;
}
......
......@@ -108,6 +108,42 @@ TEST(GatedSpectrometer, GatingKernel)
}
TEST(GatedSpectrometer, countBitSet)
{
size_t nBlocks = 16;
thrust::device_vector<int64_t> _sideChannelData(nBlocks);
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
const int64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
thrust::device_vector<int> res(1);
// test 1 side channel
res[0] = 0;
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size()+255)/256, 256>>>(sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], 0);
res[0] = 0;
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size()+255)/256, 256>>>(sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], nBlocks);
// test mult side channels w stride.
res[0] = 0;
int nSideChannels = 4;
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
for (size_t i = 2; i < _sideChannelData.size(); i+=nSideChannels)
_sideChannelData[i] = 1L;
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size()+255)/256, 256>>>(sideCD, nBlocks, 0, nSideChannels, 3, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], 0);
res[0] = 0;
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size()+255)/256, 256>>>(sideCD, nBlocks, 0, nSideChannels, 2, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], nBlocks / nSideChannels);
}
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment