Commit d0ef28a2 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Count number of samples per spectra instead og ehaps

parent 794b9f08
...@@ -89,7 +89,7 @@ public: ...@@ -89,7 +89,7 @@ public:
private: private:
void process(thrust::device_vector<RawVoltageType> const &digitiser_raw, void process(thrust::device_vector<RawVoltageType> const &digitiser_raw,
thrust::device_vector<int64_t> const &sideChannelData, thrust::device_vector<uint64_t> const &sideChannelData,
thrust::device_vector<IntegratedPowerType> &detected, thrust::device_vector<IntegratedPowerType> &detected,
thrust::device_vector<size_t> &noOfBitSet); thrust::device_vector<size_t> &noOfBitSet);
...@@ -103,6 +103,7 @@ private: ...@@ -103,6 +103,7 @@ private:
std::size_t _batch; std::size_t _batch;
std::size_t _nsamps_per_output_spectra; std::size_t _nsamps_per_output_spectra;
std::size_t _nsamps_per_buffer; std::size_t _nsamps_per_buffer;
std::size_t _nsamps_per_heap;
HandlerType &_handler; HandlerType &_handler;
cufftHandle _fft_plan; cufftHandle _fft_plan;
...@@ -113,7 +114,7 @@ private: ...@@ -113,7 +114,7 @@ private:
// Input data // Input data
DoubleDeviceBuffer<RawVoltageType> _raw_voltage_db; DoubleDeviceBuffer<RawVoltageType> _raw_voltage_db;
DoubleDeviceBuffer<int64_t> _sideChannelData_db; DoubleDeviceBuffer<uint64_t> _sideChannelData_db;
// Output data // Output data
DoubleDeviceBuffer<IntegratedPowerType> _power_db; DoubleDeviceBuffer<IntegratedPowerType> _power_db;
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <thrust/system/cuda/execution_policy.h> #include <thrust/system/cuda/execution_policy.h>
#include <iostream> #include <iostream>
#include <iomanip>
#include <cstring> #include <cstring>
#include <sstream> #include <sstream>
...@@ -15,14 +16,14 @@ namespace psrdada_cpp { ...@@ -15,14 +16,14 @@ namespace psrdada_cpp {
namespace effelsberg { namespace effelsberg {
namespace edd { namespace edd {
__global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const int64_t* __restrict__ sideChannelData, __global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const uint64_t* __restrict__ sideChannelData,
size_t N, size_t heapSize, size_t bitpos, size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel, const float* __restrict__ _baseLineN) { size_t noOfSideChannels, size_t selectedSideChannel, const float* __restrict__ _baseLineN) {
float baseLine = (*_baseLineN) / N; float baseLine = (*_baseLineN) / N;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N); for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N);
i += blockDim.x * gridDim.x) { i += blockDim.x * gridDim.x) {
const float w = G0[i] - baseLine; const float w = G0[i] - baseLine;
const int64_t sideChannelItem = const uint64_t sideChannelItem =
sideChannelData[((i / heapSize) * (noOfSideChannels)) + sideChannelData[((i / heapSize) * (noOfSideChannels)) +
selectedSideChannel]; // Probably not optimal access as selectedSideChannel]; // Probably not optimal access as
// same data is copied for several // same data is copied for several
...@@ -36,23 +37,22 @@ __global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const int ...@@ -36,23 +37,22 @@ __global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const int
} }
__global__ void countBitSet(const int64_t *sideChannelData, size_t N, size_t __global__ void countBitSet(const uint64_t *sideChannelData, size_t N, size_t
bitpos, size_t noOfSideChannels, size_t selectedSideChannel, size_t bitpos, size_t noOfSideChannels, size_t selectedSideChannel, size_t
*nBitsSet) *nBitsSet)
{ {
// really not optimized reduction, but here only trivial array sizes. // really not optimized reduction, but here only trivial array sizes.
int i = blockIdx.x * blockDim.x + threadIdx.x; // run only in one block!
__shared__ unsigned int x[256]; __shared__ size_t x[1024];
size_t ls = 0;
if (i == 0) for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N);
nBitsSet[0] = 0; i += blockDim.x * gridDim.x) {
ls += TEST_BIT(sideChannelData[i * noOfSideChannels + selectedSideChannel], bitpos);
}
x[threadIdx.x] = ls;
if (i * noOfSideChannels + selectedSideChannel < N)
x[threadIdx.x] = TEST_BIT(sideChannelData[i * noOfSideChannels + selectedSideChannel], bitpos);
else
x[threadIdx.x] = 0;
__syncthreads(); __syncthreads();
for(int s = blockDim.x / 2; s > 0; s = s / 2) for(int s = blockDim.x / 2; s > 0; s = s / 2)
{ {
if (threadIdx.x < s) if (threadIdx.x < s)
...@@ -111,7 +111,7 @@ GatedSpectrometer<HandlerType, IntegratedPowerType>::GatedSpectrometer( ...@@ -111,7 +111,7 @@ GatedSpectrometer<HandlerType, IntegratedPowerType>::GatedSpectrometer(
_selectedSideChannel(selectedSideChannel), _selectedBit(selectedBit), _selectedSideChannel(selectedSideChannel), _selectedBit(selectedBit),
_fft_length(fft_length), _fft_length(fft_length),
_naccumulate(naccumulate), _nbits(nbits), _handler(handler), _fft_plan(0), _naccumulate(naccumulate), _nbits(nbits), _handler(handler), _fft_plan(0),
_call_count(0) { _call_count(0), _nsamps_per_heap(4096) {
// Sanity checks // Sanity checks
assert(((_nbits == 12) || (_nbits == 8))); assert(((_nbits == 12) || (_nbits == 8)));
...@@ -191,8 +191,9 @@ GatedSpectrometer<HandlerType, IntegratedPowerType>::GatedSpectrometer( ...@@ -191,8 +191,9 @@ GatedSpectrometer<HandlerType, IntegratedPowerType>::GatedSpectrometer(
BOOST_LOG_TRIVIAL(debug) << " Powers size: " << _power_db.size() / 2; BOOST_LOG_TRIVIAL(debug) << " Powers size: " << _power_db.size() / 2;
_noOfBitSetsInSideChannel.resize( batch / (_naccumulate / nBlocks)); _noOfBitSetsInSideChannel.resize( batch / (_naccumulate / nBlocks));
thrust::fill(_noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0.); thrust::fill(_noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0L);
thrust::fill(_noOfBitSetsInSideChannel.b().begin(), _noOfBitSetsInSideChannel.b().end(), 0.); thrust::fill(_noOfBitSetsInSideChannel.b().begin(), _noOfBitSetsInSideChannel.b().end(), 0L);
BOOST_LOG_TRIVIAL(debug) << " Bit set counrer size: " << _noOfBitSetsInSideChannel.size();
// on the host both power are stored in the same data buffer together with // on the host both power are stored in the same data buffer together with
// the number of bit sets // the number of bit sets
...@@ -253,7 +254,7 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::init(RawBytes &block) ...@@ -253,7 +254,7 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::init(RawBytes &block)
template <class HandlerType, typename IntegratedPowerType> template <class HandlerType, typename IntegratedPowerType>
void GatedSpectrometer<HandlerType, IntegratedPowerType>::process( void GatedSpectrometer<HandlerType, IntegratedPowerType>::process(
thrust::device_vector<RawVoltageType> const &digitiser_raw, thrust::device_vector<RawVoltageType> const &digitiser_raw,
thrust::device_vector<int64_t> const &sideChannelData, thrust::device_vector<uint64_t> const &sideChannelData,
thrust::device_vector<IntegratedPowerType> &detected, thrust::device_vector<size_t> &noOfBitSet) { thrust::device_vector<IntegratedPowerType> &detected, thrust::device_vector<size_t> &noOfBitSet) {
BOOST_LOG_TRIVIAL(debug) << "Unpacking raw voltages"; BOOST_LOG_TRIVIAL(debug) << "Unpacking raw voltages";
switch (_nbits) { switch (_nbits) {
...@@ -280,11 +281,12 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::process( ...@@ -280,11 +281,12 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::process(
for (size_t i = 0; i < _noOfBitSetsInSideChannel.size(); i++) for (size_t i = 0; i < _noOfBitSetsInSideChannel.size(); i++)
{ // ToDo: Should be in one kernel call { // ToDo: Should be in one kernel call
countBitSet<<<(sideChannelData.size()+255)/256, 256, 0, countBitSet<<<1, 1024, 0, _proc_stream>>>(thrust::raw_pointer_cast(sideChannelData.data() + i * sideChannelData.size() / _noOfBitSetsInSideChannel.size() ),
_proc_stream>>>(thrust::raw_pointer_cast(sideChannelData.data() + i * sideChannelData.size() / _noOfBitSetsInSideChannel.size() ),
sideChannelData.size() / _noOfBitSetsInSideChannel.size(), _selectedBit, sideChannelData.size() / _noOfBitSetsInSideChannel.size(), _selectedBit,
_dadaBufferLayout.getNSideChannels(), _selectedBit, _dadaBufferLayout.getNSideChannels(), _selectedBit,
thrust::raw_pointer_cast(noOfBitSet.data() + i)); thrust::raw_pointer_cast(noOfBitSet.data() + i));
CUDA_ERROR_CHECK(cudaStreamSynchronize(_proc_stream));
} }
BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1"; BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1";
...@@ -337,6 +339,8 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b ...@@ -337,6 +339,8 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
static_cast<void *>(_sideChannelData_db.a_ptr()), static_cast<void *>(_sideChannelData_db.a_ptr()),
static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()), static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()),
_dadaBufferLayout.sizeOfSideChannelData(), cudaMemcpyHostToDevice, _h2d_stream)); _dadaBufferLayout.sizeOfSideChannelData(), cudaMemcpyHostToDevice, _h2d_stream));
BOOST_LOG_TRIVIAL(debug) << "First side channel item: 0x" << std::setw(12) << std::setfill('0') << std::hex << (reinterpret_cast<uint64_t*>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()))[0] << std::dec;
if (_call_count == 1) { if (_call_count == 1) {
return false; return false;
...@@ -354,7 +358,7 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b ...@@ -354,7 +358,7 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
_noOfBitSetsInSideChannel.swap(); _noOfBitSetsInSideChannel.swap();
// move to specific stream! // move to specific stream!
thrust::fill(thrust::cuda::par.on(_proc_stream),_power_db.a().begin(), _power_db.a().end(), 0.); thrust::fill(thrust::cuda::par.on(_proc_stream),_power_db.a().begin(), _power_db.a().end(), 0.);
thrust::fill(thrust::cuda::par.on(_proc_stream), _noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0.); thrust::fill(thrust::cuda::par.on(_proc_stream), _noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0L);
} }
process(_raw_voltage_db.b(), _sideChannelData_db.b(), _power_db.a(), _noOfBitSetsInSideChannel.a()); process(_raw_voltage_db.b(), _sideChannelData_db.b(), _power_db.a(), _noOfBitSetsInSideChannel.a());
...@@ -379,10 +383,11 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b ...@@ -379,10 +383,11 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
cudaMemcpyDeviceToHost, _d2h_stream)); cudaMemcpyDeviceToHost, _d2h_stream));
// copy noOf bit set data // copy noOf bit set data
CUDA_ERROR_CHECK( CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 2 * _nchans ), cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType)),
static_cast<void *>(_noOfBitSetsInSideChannel.b_ptr() + i ), static_cast<void *>(_noOfBitSetsInSideChannel.b_ptr() + i ),
1 * sizeof(size_t), 1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream)); cudaMemcpyDeviceToHost, _d2h_stream));
BOOST_LOG_TRIVIAL(info) << " TOBR NOF BITS SET: " << _noOfBitSetsInSideChannel.b()[i];
} }
BOOST_LOG_TRIVIAL(debug) << "Copy Data back to host"; BOOST_LOG_TRIVIAL(debug) << "Copy Data back to host";
...@@ -398,10 +403,11 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b ...@@ -398,10 +403,11 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
size_t memOffset = 2 * i * (_nchans * sizeof(IntegratedPowerType) + sizeof(size_t)); size_t memOffset = 2 * i * (_nchans * sizeof(IntegratedPowerType) + sizeof(size_t));
size_t* on_values = reinterpret_cast<size_t*> (_host_power_db.b_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType)); size_t* on_values = reinterpret_cast<size_t*> (_host_power_db.b_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType));
*on_values *= _nsamps_per_heap;
size_t* off_values = reinterpret_cast<size_t*> (_host_power_db.b_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType) + sizeof(size_t)); size_t* off_values = reinterpret_cast<size_t*> (_host_power_db.b_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType) + sizeof(size_t));
*off_values = _dadaBufferLayout.getNHeaps() - (*on_values); *off_values = _nsamps_per_output_spectra - (*on_values);
BOOST_LOG_TRIVIAL(info) << " " << i << ": No of bit set in side channel: " << *on_values << " / " << *off_values << std::endl; BOOST_LOG_TRIVIAL(info) << " " << i << ": No of samples wo/w. bit set in side channel: " << *on_values << " / " << *off_values << std::endl;
} }
// Wrap in a RawBytes object here; // Wrap in a RawBytes object here;
......
...@@ -11,7 +11,7 @@ namespace { ...@@ -11,7 +11,7 @@ namespace {
TEST(GatedSpectrometer, BitManipulationMacros) { TEST(GatedSpectrometer, BitManipulationMacros) {
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {
int64_t v = 0; uint64_t v = 0;
SET_BIT(v, i); SET_BIT(v, i);
for (int j = 0; j < 64; j++) { for (int j = 0; j < 64; j++) {
...@@ -57,7 +57,7 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -57,7 +57,7 @@ TEST(GatedSpectrometer, GatingKernel)
thrust::device_vector<float> G0(blockSize * nBlocks); thrust::device_vector<float> G0(blockSize * nBlocks);
thrust::device_vector<float> G1(blockSize * nBlocks); thrust::device_vector<float> G1(blockSize * nBlocks);
thrust::device_vector<int64_t> _sideChannelData(nBlocks); thrust::device_vector<uint64_t> _sideChannelData(nBlocks);
thrust::device_vector<float> baseLine(1); thrust::device_vector<float> baseLine(1);
thrust::fill(G0.begin(), G0.end(), 42); thrust::fill(G0.begin(), G0.end(), 42);
...@@ -67,8 +67,8 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -67,8 +67,8 @@ TEST(GatedSpectrometer, GatingKernel)
// everything to G0 // everything to G0
{ {
baseLine[0] = 0.; baseLine[0] = 0.;
const int64_t *sideCD = const uint64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data())); (uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>( psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()), thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD, thrust::raw_pointer_cast(G1.data()), sideCD,
...@@ -88,8 +88,8 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -88,8 +88,8 @@ TEST(GatedSpectrometer, GatingKernel)
{ {
baseLine[0] = -5. * G0.size(); baseLine[0] = -5. * G0.size();
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L); thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
const int64_t *sideCD = const uint64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data())); (uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>( psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()), thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD, thrust::raw_pointer_cast(G1.data()), sideCD,
...@@ -108,24 +108,24 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -108,24 +108,24 @@ TEST(GatedSpectrometer, GatingKernel)
TEST(GatedSpectrometer, countBitSet) { TEST(GatedSpectrometer, countBitSet) {
size_t nBlocks = 16; size_t nBlocks = 100000;
thrust::device_vector<int64_t> _sideChannelData(nBlocks); thrust::device_vector<uint64_t> _sideChannelData(nBlocks);
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0); thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
const int64_t *sideCD = const uint64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data())); (uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
thrust::device_vector<size_t> res(1); thrust::device_vector<size_t> res(1);
// test 1 side channel // test 1 side channel
res[0] = 0; res[0] = 0;
psrdada_cpp::effelsberg::edd:: psrdada_cpp::effelsberg::edd::
countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>( countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data())); sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], 0u); EXPECT_EQ(res[0], 0u);
res[0] = 0; res[0] = 0;
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L); thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>( psrdada_cpp::effelsberg::edd::countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data())); sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], nBlocks); EXPECT_EQ(res[0], nBlocks);
...@@ -135,13 +135,13 @@ TEST(GatedSpectrometer, countBitSet) { ...@@ -135,13 +135,13 @@ TEST(GatedSpectrometer, countBitSet) {
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0); thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
for (size_t i = 2; i < _sideChannelData.size(); i += nSideChannels) for (size_t i = 2; i < _sideChannelData.size(); i += nSideChannels)
_sideChannelData[i] = 1L; _sideChannelData[i] = 1L;
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>( psrdada_cpp::effelsberg::edd::countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, nSideChannels, 3, sideCD, nBlocks, 0, nSideChannels, 3,
thrust::raw_pointer_cast(res.data())); thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], 0u); EXPECT_EQ(res[0], 0u);
res[0] = 0; res[0] = 0;
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>( psrdada_cpp::effelsberg::edd::countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, nSideChannels, 2, sideCD, nBlocks, 0, nSideChannels, 2,
thrust::raw_pointer_cast(res.data())); thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], nBlocks / nSideChannels); EXPECT_EQ(res[0], nBlocks / nSideChannels);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment