Commit d0ef28a2 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Count number of samples per spectra instead og ehaps

parent 794b9f08
......@@ -89,7 +89,7 @@ public:
private:
void process(thrust::device_vector<RawVoltageType> const &digitiser_raw,
thrust::device_vector<int64_t> const &sideChannelData,
thrust::device_vector<uint64_t> const &sideChannelData,
thrust::device_vector<IntegratedPowerType> &detected,
thrust::device_vector<size_t> &noOfBitSet);
......@@ -103,6 +103,7 @@ private:
std::size_t _batch;
std::size_t _nsamps_per_output_spectra;
std::size_t _nsamps_per_buffer;
std::size_t _nsamps_per_heap;
HandlerType &_handler;
cufftHandle _fft_plan;
......@@ -113,7 +114,7 @@ private:
// Input data
DoubleDeviceBuffer<RawVoltageType> _raw_voltage_db;
DoubleDeviceBuffer<int64_t> _sideChannelData_db;
DoubleDeviceBuffer<uint64_t> _sideChannelData_db;
// Output data
DoubleDeviceBuffer<IntegratedPowerType> _power_db;
......
......@@ -8,6 +8,7 @@
#include <thrust/system/cuda/execution_policy.h>
#include <iostream>
#include <iomanip>
#include <cstring>
#include <sstream>
......@@ -15,14 +16,14 @@ namespace psrdada_cpp {
namespace effelsberg {
namespace edd {
__global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const int64_t* __restrict__ sideChannelData,
__global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const uint64_t* __restrict__ sideChannelData,
size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel, const float* __restrict__ _baseLineN) {
float baseLine = (*_baseLineN) / N;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N);
i += blockDim.x * gridDim.x) {
const float w = G0[i] - baseLine;
const int64_t sideChannelItem =
const uint64_t sideChannelItem =
sideChannelData[((i / heapSize) * (noOfSideChannels)) +
selectedSideChannel]; // Probably not optimal access as
// same data is copied for several
......@@ -36,23 +37,22 @@ __global__ void gating(float* __restrict__ G0, float* __restrict__ G1, const int
}
__global__ void countBitSet(const int64_t *sideChannelData, size_t N, size_t
__global__ void countBitSet(const uint64_t *sideChannelData, size_t N, size_t
bitpos, size_t noOfSideChannels, size_t selectedSideChannel, size_t
*nBitsSet)
{
// really not optimized reduction, but here only trivial array sizes.
int i = blockIdx.x * blockDim.x + threadIdx.x;
__shared__ unsigned int x[256];
// run only in one block!
__shared__ size_t x[1024];
size_t ls = 0;
if (i == 0)
nBitsSet[0] = 0;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < N);
i += blockDim.x * gridDim.x) {
ls += TEST_BIT(sideChannelData[i * noOfSideChannels + selectedSideChannel], bitpos);
}
x[threadIdx.x] = ls;
if (i * noOfSideChannels + selectedSideChannel < N)
x[threadIdx.x] = TEST_BIT(sideChannelData[i * noOfSideChannels + selectedSideChannel], bitpos);
else
x[threadIdx.x] = 0;
__syncthreads();
for(int s = blockDim.x / 2; s > 0; s = s / 2)
{
if (threadIdx.x < s)
......@@ -111,7 +111,7 @@ GatedSpectrometer<HandlerType, IntegratedPowerType>::GatedSpectrometer(
_selectedSideChannel(selectedSideChannel), _selectedBit(selectedBit),
_fft_length(fft_length),
_naccumulate(naccumulate), _nbits(nbits), _handler(handler), _fft_plan(0),
_call_count(0) {
_call_count(0), _nsamps_per_heap(4096) {
// Sanity checks
assert(((_nbits == 12) || (_nbits == 8)));
......@@ -191,8 +191,9 @@ GatedSpectrometer<HandlerType, IntegratedPowerType>::GatedSpectrometer(
BOOST_LOG_TRIVIAL(debug) << " Powers size: " << _power_db.size() / 2;
_noOfBitSetsInSideChannel.resize( batch / (_naccumulate / nBlocks));
thrust::fill(_noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0.);
thrust::fill(_noOfBitSetsInSideChannel.b().begin(), _noOfBitSetsInSideChannel.b().end(), 0.);
thrust::fill(_noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0L);
thrust::fill(_noOfBitSetsInSideChannel.b().begin(), _noOfBitSetsInSideChannel.b().end(), 0L);
BOOST_LOG_TRIVIAL(debug) << " Bit set counrer size: " << _noOfBitSetsInSideChannel.size();
// on the host both power are stored in the same data buffer together with
// the number of bit sets
......@@ -253,7 +254,7 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::init(RawBytes &block)
template <class HandlerType, typename IntegratedPowerType>
void GatedSpectrometer<HandlerType, IntegratedPowerType>::process(
thrust::device_vector<RawVoltageType> const &digitiser_raw,
thrust::device_vector<int64_t> const &sideChannelData,
thrust::device_vector<uint64_t> const &sideChannelData,
thrust::device_vector<IntegratedPowerType> &detected, thrust::device_vector<size_t> &noOfBitSet) {
BOOST_LOG_TRIVIAL(debug) << "Unpacking raw voltages";
switch (_nbits) {
......@@ -280,11 +281,12 @@ void GatedSpectrometer<HandlerType, IntegratedPowerType>::process(
for (size_t i = 0; i < _noOfBitSetsInSideChannel.size(); i++)
{ // ToDo: Should be in one kernel call
countBitSet<<<(sideChannelData.size()+255)/256, 256, 0,
_proc_stream>>>(thrust::raw_pointer_cast(sideChannelData.data() + i * sideChannelData.size() / _noOfBitSetsInSideChannel.size() ),
countBitSet<<<1, 1024, 0, _proc_stream>>>(thrust::raw_pointer_cast(sideChannelData.data() + i * sideChannelData.size() / _noOfBitSetsInSideChannel.size() ),
sideChannelData.size() / _noOfBitSetsInSideChannel.size(), _selectedBit,
_dadaBufferLayout.getNSideChannels(), _selectedBit,
thrust::raw_pointer_cast(noOfBitSet.data() + i));
CUDA_ERROR_CHECK(cudaStreamSynchronize(_proc_stream));
}
BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1";
......@@ -337,6 +339,8 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
static_cast<void *>(_sideChannelData_db.a_ptr()),
static_cast<void *>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()),
_dadaBufferLayout.sizeOfSideChannelData(), cudaMemcpyHostToDevice, _h2d_stream));
BOOST_LOG_TRIVIAL(debug) << "First side channel item: 0x" << std::setw(12) << std::setfill('0') << std::hex << (reinterpret_cast<uint64_t*>(block.ptr() + _dadaBufferLayout.sizeOfData() + _dadaBufferLayout.sizeOfGap()))[0] << std::dec;
if (_call_count == 1) {
return false;
......@@ -354,7 +358,7 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
_noOfBitSetsInSideChannel.swap();
// move to specific stream!
thrust::fill(thrust::cuda::par.on(_proc_stream),_power_db.a().begin(), _power_db.a().end(), 0.);
thrust::fill(thrust::cuda::par.on(_proc_stream), _noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0.);
thrust::fill(thrust::cuda::par.on(_proc_stream), _noOfBitSetsInSideChannel.a().begin(), _noOfBitSetsInSideChannel.a().end(), 0L);
}
process(_raw_voltage_db.b(), _sideChannelData_db.b(), _power_db.a(), _noOfBitSetsInSideChannel.a());
......@@ -379,10 +383,11 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
cudaMemcpyDeviceToHost, _d2h_stream));
// copy noOf bit set data
CUDA_ERROR_CHECK(
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 2 * _nchans ),
cudaMemcpyAsync( static_cast<void *>(_host_power_db.a_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType)),
static_cast<void *>(_noOfBitSetsInSideChannel.b_ptr() + i ),
1 * sizeof(size_t),
cudaMemcpyDeviceToHost, _d2h_stream));
BOOST_LOG_TRIVIAL(info) << " TOBR NOF BITS SET: " << _noOfBitSetsInSideChannel.b()[i];
}
BOOST_LOG_TRIVIAL(debug) << "Copy Data back to host";
......@@ -398,10 +403,11 @@ bool GatedSpectrometer<HandlerType, IntegratedPowerType>::operator()(RawBytes &b
size_t memOffset = 2 * i * (_nchans * sizeof(IntegratedPowerType) + sizeof(size_t));
size_t* on_values = reinterpret_cast<size_t*> (_host_power_db.b_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType));
*on_values *= _nsamps_per_heap;
size_t* off_values = reinterpret_cast<size_t*> (_host_power_db.b_ptr() + memOffset + 2 * _nchans * sizeof(IntegratedPowerType) + sizeof(size_t));
*off_values = _dadaBufferLayout.getNHeaps() - (*on_values);
*off_values = _nsamps_per_output_spectra - (*on_values);
BOOST_LOG_TRIVIAL(info) << " " << i << ": No of bit set in side channel: " << *on_values << " / " << *off_values << std::endl;
BOOST_LOG_TRIVIAL(info) << " " << i << ": No of samples wo/w. bit set in side channel: " << *on_values << " / " << *off_values << std::endl;
}
// Wrap in a RawBytes object here;
......
......@@ -11,7 +11,7 @@ namespace {
TEST(GatedSpectrometer, BitManipulationMacros) {
for (int i = 0; i < 64; i++) {
int64_t v = 0;
uint64_t v = 0;
SET_BIT(v, i);
for (int j = 0; j < 64; j++) {
......@@ -57,7 +57,7 @@ TEST(GatedSpectrometer, GatingKernel)
thrust::device_vector<float> G0(blockSize * nBlocks);
thrust::device_vector<float> G1(blockSize * nBlocks);
thrust::device_vector<int64_t> _sideChannelData(nBlocks);
thrust::device_vector<uint64_t> _sideChannelData(nBlocks);
thrust::device_vector<float> baseLine(1);
thrust::fill(G0.begin(), G0.end(), 42);
......@@ -67,8 +67,8 @@ TEST(GatedSpectrometer, GatingKernel)
// everything to G0
{
baseLine[0] = 0.;
const int64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD,
......@@ -88,8 +88,8 @@ TEST(GatedSpectrometer, GatingKernel)
{
baseLine[0] = -5. * G0.size();
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
const int64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD,
......@@ -108,24 +108,24 @@ TEST(GatedSpectrometer, GatingKernel)
TEST(GatedSpectrometer, countBitSet) {
size_t nBlocks = 16;
thrust::device_vector<int64_t> _sideChannelData(nBlocks);
size_t nBlocks = 100000;
thrust::device_vector<uint64_t> _sideChannelData(nBlocks);
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
const int64_t *sideCD =
(int64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
thrust::device_vector<size_t> res(1);
// test 1 side channel
res[0] = 0;
psrdada_cpp::effelsberg::edd::
countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>(
countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], 0u);
res[0] = 0;
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>(
psrdada_cpp::effelsberg::edd::countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, 1, 0, thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], nBlocks);
......@@ -135,13 +135,13 @@ TEST(GatedSpectrometer, countBitSet) {
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
for (size_t i = 2; i < _sideChannelData.size(); i += nSideChannels)
_sideChannelData[i] = 1L;
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>(
psrdada_cpp::effelsberg::edd::countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, nSideChannels, 3,
thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], 0u);
res[0] = 0;
psrdada_cpp::effelsberg::edd::countBitSet<<<(_sideChannelData.size() + 255) / 256, 256>>>(
psrdada_cpp::effelsberg::edd::countBitSet<<<1, 1024>>>(
sideCD, nBlocks, 0, nSideChannels, 2,
thrust::raw_pointer_cast(res.data()));
EXPECT_EQ(res[0], nBlocks / nSideChannels);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment