Commit d22f63f5 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Fixed mem transfer compute overlap

parent 5ce8991d
......@@ -46,6 +46,12 @@ struct PolarizationData
thrust::device_vector<UnpackedVoltageType> _baseLineG0;
/// Baseline in gate 1 state
thrust::device_vector<UnpackedVoltageType> _baseLineG1;
/// Baseline in gate 0 state after update
thrust::device_vector<UnpackedVoltageType> _baseLineG0_update;
/// Baseline in gate 1 state after update
thrust::device_vector<UnpackedVoltageType> _baseLineG1_update;
/// Channelized voltage in gate 0 state
thrust::device_vector<ChannelisedVoltageType> _channelised_voltage_G0;
/// Channelized voltage in gate 1 state
......@@ -241,8 +247,8 @@ private:
__global__ void gating(float *G0, float *G1, const int64_t *sideChannelData,
size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel,
const float baseLineG0,
const float baseLineG1,
const float* __restrict__ _baseLineG0,
const float* __restrict__ _baseLineG1,
float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1,
uint64_cu* stats_G0,
......
......@@ -51,8 +51,8 @@ __global__ void gating(float* __restrict__ G0,
const uint64_t* __restrict__ sideChannelData,
size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel,
const float baseLineG0,
const float baseLineG1,
const float* __restrict__ _baseLineG0,
const float* __restrict__ _baseLineG1,
float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1,
uint64_cu* stats_G0, uint64_cu* stats_G1) {
......@@ -60,6 +60,9 @@ __global__ void gating(float* __restrict__ G0,
uint32_t _G0stats = 0;
uint32_t _G1stats = 0;
const float baseLineG0 = _baseLineG0[0];
const float baseLineG1 = _baseLineG1[0];
float baselineUpdateG0 = 0;
float baselineUpdateG1 = 0;
......@@ -115,6 +118,36 @@ __global__ void gating(float* __restrict__ G0,
// Updates the baselines of the gates for the polarization set for the next
// block
// only few output blocks per input block thus execution on only one thread.
// Important is that the execution is async on the GPU.
__global__ void update_baselines(float* __restrict__ baseLineG0,
float* __restrict__ baseLineG1,
float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1,
uint64_cu* stats_G0, uint64_cu* stats_G1,
size_t N)
{
size_t NG0 = 0;
size_t NG1 = 0;
for (size_t i =0; i < N; i++)
{
NG0 += stats_G0[i];
NG1 += stats_G1[i];
}
baseLineG0[0] = baseLineNG0[0] / NG0;
baseLineG1[0] = baseLineNG1[0] / NG1;
baseLineNG0[0] = 0;
baseLineNG1[0] = 0;
}
template <class HandlerType>
GatedSpectrometer<HandlerType>::GatedSpectrometer(
const DadaBufferLayout &dadaBufferLayout,
......@@ -194,10 +227,14 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
_unpacked_voltage_G0.resize(_nsamps_per_buffer);
_unpacked_voltage_G1.resize(_nsamps_per_buffer);
polarization0._baseLineG0.resize(1);
polarization0._baseLineG1.resize(1);
polarization1._baseLineG0.resize(1);
polarization1._baseLineG1.resize(1);
polarization0._baseLineG0.resize(1);
polarization0._baseLineG0_update.resize(1);
polarization0._baseLineG1.resize(1);
polarization0._baseLineG1_update.resize(1);
polarization1._baseLineG0.resize(1);
polarization1._baseLineG0_update.resize(1);
polarization1._baseLineG1.resize(1);
polarization1._baseLineG1_update.resize(1);
BOOST_LOG_TRIVIAL(debug) << " Unpacked voltages size (in samples): "
<< _unpacked_voltage_G0.size();
......@@ -286,14 +323,7 @@ void GatedSpectrometer<HandlerType>::gated_fft(
throw std::runtime_error("Unsupported number of bits");
}
// Get baseline from previous block
float previous_baseLineG0 = data._baseLineG0[0];
float previous_baseLineG1 = data._baseLineG1[0];
uint64_t NG0 = 0;
uint64_t NG1 = 0;
// Loop over outputblocks, for case of multiple output blocks per input block
// Loop over outputblocks, for case of multiple output blocks per input block
int step = data._sideChannelData.b().size() / _noOfBitSetsIn_G0.size();
for (size_t i = 0; i < _noOfBitSetsIn_G0.size(); i++)
......@@ -307,19 +337,26 @@ void GatedSpectrometer<HandlerType>::gated_fft(
_selectedBit,
_dadaBufferLayout.getNSideChannels(),
_selectedSideChannel,
previous_baseLineG0, previous_baseLineG1,
thrust::raw_pointer_cast(data._baseLineG0.data()),
thrust::raw_pointer_cast(data._baseLineG1.data()),
thrust::raw_pointer_cast(data._baseLineG0_update.data()),
thrust::raw_pointer_cast(data._baseLineG1_update.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G0.data() + i),
thrust::raw_pointer_cast(_noOfBitSetsIn_G1.data() + i)
);
NG0 += _noOfBitSetsIn_G0[i];
NG1 += _noOfBitSetsIn_G1[i];
}
data._baseLineG0[0] /= NG0;
data._baseLineG1[0] /= NG1;
BOOST_LOG_TRIVIAL(debug) << "Updating Baselines\n G0: " << previous_baseLineG0 << " -> " << data._baseLineG0[0] << ", " << previous_baseLineG1 << " -> " << data._baseLineG1[0] ;
// only few output blocks per input block thus execution on only one thread.
// Important is that the execution is async on the GPU.
update_baselines<<<1,1,0, _proc_stream>>>(
thrust::raw_pointer_cast(data._baseLineG0.data()),
thrust::raw_pointer_cast(data._baseLineG1.data()),
thrust::raw_pointer_cast(data._baseLineG0_update.data()),
thrust::raw_pointer_cast(data._baseLineG1_update.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G0.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G1.data()),
_noOfBitSetsIn_G0.size()
);
BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1";
UnpackedVoltageType *_unpacked_voltage_ptr =
......
......@@ -152,6 +152,8 @@ TEST(GatedSpectrometer, GatingKernel)
thrust::device_vector<float> baseLineG0(1);
thrust::device_vector<float> baseLineG1(1);
thrust::device_vector<float> baseLineG0_update(1);
thrust::device_vector<float> baseLineG1_update(1);
thrust::fill(G0.begin(), G0.end(), 42);
thrust::fill(G1.begin(), G1.end(), 23);
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
......@@ -160,18 +162,22 @@ TEST(GatedSpectrometer, GatingKernel)
{
thrust::fill(_nG0.begin(), _nG0.end(), 0);
thrust::fill(_nG1.begin(), _nG1.end(), 0);
baseLineG0[0] = 0.;
baseLineG1[0] = 0.;
baseLineG0[0] = -3;
baseLineG1[0] = -4;
baseLineG0_update[0] = 0;
baseLineG1_update[0] = 0;
const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024 , 1024>>>(
thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), G0.size(), 0, 1,
G0.size(), blockSize, 0, 1,
0,
-3., -4,
thrust::raw_pointer_cast(baseLineG0.data()),
thrust::raw_pointer_cast(baseLineG1.data()),
thrust::raw_pointer_cast(baseLineG0_update.data()),
thrust::raw_pointer_cast(baseLineG1_update.data()),
thrust::raw_pointer_cast(_nG0.data()),
thrust::raw_pointer_cast(_nG1.data())
);
......@@ -188,27 +194,31 @@ TEST(GatedSpectrometer, GatingKernel)
EXPECT_EQ(_nG0[0], G0.size());
EXPECT_EQ(_nG1[0], 0u);
EXPECT_FLOAT_EQ(baseLineG0[0] / (_nG0[0] + 1E-127), 42.f);
EXPECT_FLOAT_EQ(baseLineG1[0] / (_nG1[0] + 1E-127), 0.f);
EXPECT_FLOAT_EQ(42.f, baseLineG0_update[0] / (_nG0[0] + 1E-121));
EXPECT_FLOAT_EQ(0.f, baseLineG1_update[0] / (_nG1[0] + 1E-121));
}
// everything to G1 // with baseline -5
{
thrust::fill(_nG0.begin(), _nG0.end(), 0);
thrust::fill(_nG1.begin(), _nG1.end(), 0);
baseLineG0[0] = 0.;
baseLineG1[0] = 0.;
baseLineG0[0] = 5.;
baseLineG1[0] = -2;
baseLineG0_update[0] = 0;
baseLineG1_update[0] = 0;
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), G0.size(), 0, 1,
G0.size(), blockSize, 0, 1,
0,
5., -2.,
thrust::raw_pointer_cast(baseLineG0.data()),
thrust::raw_pointer_cast(baseLineG1.data()),
thrust::raw_pointer_cast(baseLineG0_update.data()),
thrust::raw_pointer_cast(baseLineG1_update.data()),
thrust::raw_pointer_cast(_nG0.data()),
thrust::raw_pointer_cast(_nG1.data())
);
......@@ -223,8 +233,9 @@ TEST(GatedSpectrometer, GatingKernel)
EXPECT_EQ(_nG0[0], 0u);
EXPECT_EQ(_nG1[0], G1.size());
EXPECT_FLOAT_EQ(baseLineG0[0] / (_nG0[0] + 1E-127), 0.);
EXPECT_FLOAT_EQ(baseLineG1[0] / (_nG1[0] + 1E-127), 42.);
EXPECT_FLOAT_EQ(0.f, baseLineG0_update[0] / (_nG0[0] + 1E-121));
EXPECT_FLOAT_EQ(42.f, baseLineG1_update[0] / (_nG1[0] + 1E-121));
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment