Skip to content
Snippets Groups Projects
Commit d22f63f5 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Fixed mem transfer compute overlap

parent 5ce8991d
No related branches found
No related tags found
No related merge requests found
...@@ -46,6 +46,12 @@ struct PolarizationData ...@@ -46,6 +46,12 @@ struct PolarizationData
thrust::device_vector<UnpackedVoltageType> _baseLineG0; thrust::device_vector<UnpackedVoltageType> _baseLineG0;
/// Baseline in gate 1 state /// Baseline in gate 1 state
thrust::device_vector<UnpackedVoltageType> _baseLineG1; thrust::device_vector<UnpackedVoltageType> _baseLineG1;
/// Baseline in gate 0 state after update
thrust::device_vector<UnpackedVoltageType> _baseLineG0_update;
/// Baseline in gate 1 state after update
thrust::device_vector<UnpackedVoltageType> _baseLineG1_update;
/// Channelized voltage in gate 0 state /// Channelized voltage in gate 0 state
thrust::device_vector<ChannelisedVoltageType> _channelised_voltage_G0; thrust::device_vector<ChannelisedVoltageType> _channelised_voltage_G0;
/// Channelized voltage in gate 1 state /// Channelized voltage in gate 1 state
...@@ -241,8 +247,8 @@ private: ...@@ -241,8 +247,8 @@ private:
__global__ void gating(float *G0, float *G1, const int64_t *sideChannelData, __global__ void gating(float *G0, float *G1, const int64_t *sideChannelData,
size_t N, size_t heapSize, size_t bitpos, size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel, size_t noOfSideChannels, size_t selectedSideChannel,
const float baseLineG0, const float* __restrict__ _baseLineG0,
const float baseLineG1, const float* __restrict__ _baseLineG1,
float* __restrict__ baseLineNG0, float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1, float* __restrict__ baseLineNG1,
uint64_cu* stats_G0, uint64_cu* stats_G0,
......
...@@ -51,8 +51,8 @@ __global__ void gating(float* __restrict__ G0, ...@@ -51,8 +51,8 @@ __global__ void gating(float* __restrict__ G0,
const uint64_t* __restrict__ sideChannelData, const uint64_t* __restrict__ sideChannelData,
size_t N, size_t heapSize, size_t bitpos, size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel, size_t noOfSideChannels, size_t selectedSideChannel,
const float baseLineG0, const float* __restrict__ _baseLineG0,
const float baseLineG1, const float* __restrict__ _baseLineG1,
float* __restrict__ baseLineNG0, float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1, float* __restrict__ baseLineNG1,
uint64_cu* stats_G0, uint64_cu* stats_G1) { uint64_cu* stats_G0, uint64_cu* stats_G1) {
...@@ -60,6 +60,9 @@ __global__ void gating(float* __restrict__ G0, ...@@ -60,6 +60,9 @@ __global__ void gating(float* __restrict__ G0,
uint32_t _G0stats = 0; uint32_t _G0stats = 0;
uint32_t _G1stats = 0; uint32_t _G1stats = 0;
const float baseLineG0 = _baseLineG0[0];
const float baseLineG1 = _baseLineG1[0];
float baselineUpdateG0 = 0; float baselineUpdateG0 = 0;
float baselineUpdateG1 = 0; float baselineUpdateG1 = 0;
...@@ -115,6 +118,36 @@ __global__ void gating(float* __restrict__ G0, ...@@ -115,6 +118,36 @@ __global__ void gating(float* __restrict__ G0,
// Updates the baselines of the gates for the polarization set for the next
// block
// only few output blocks per input block thus execution on only one thread.
// Important is that the execution is async on the GPU.
__global__ void update_baselines(float* __restrict__ baseLineG0,
float* __restrict__ baseLineG1,
float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1,
uint64_cu* stats_G0, uint64_cu* stats_G1,
size_t N)
{
size_t NG0 = 0;
size_t NG1 = 0;
for (size_t i =0; i < N; i++)
{
NG0 += stats_G0[i];
NG1 += stats_G1[i];
}
baseLineG0[0] = baseLineNG0[0] / NG0;
baseLineG1[0] = baseLineNG1[0] / NG1;
baseLineNG0[0] = 0;
baseLineNG1[0] = 0;
}
template <class HandlerType> template <class HandlerType>
GatedSpectrometer<HandlerType>::GatedSpectrometer( GatedSpectrometer<HandlerType>::GatedSpectrometer(
const DadaBufferLayout &dadaBufferLayout, const DadaBufferLayout &dadaBufferLayout,
...@@ -194,10 +227,14 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer( ...@@ -194,10 +227,14 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
_unpacked_voltage_G0.resize(_nsamps_per_buffer); _unpacked_voltage_G0.resize(_nsamps_per_buffer);
_unpacked_voltage_G1.resize(_nsamps_per_buffer); _unpacked_voltage_G1.resize(_nsamps_per_buffer);
polarization0._baseLineG0.resize(1); polarization0._baseLineG0.resize(1);
polarization0._baseLineG1.resize(1); polarization0._baseLineG0_update.resize(1);
polarization1._baseLineG0.resize(1); polarization0._baseLineG1.resize(1);
polarization1._baseLineG1.resize(1); polarization0._baseLineG1_update.resize(1);
polarization1._baseLineG0.resize(1);
polarization1._baseLineG0_update.resize(1);
polarization1._baseLineG1.resize(1);
polarization1._baseLineG1_update.resize(1);
BOOST_LOG_TRIVIAL(debug) << " Unpacked voltages size (in samples): " BOOST_LOG_TRIVIAL(debug) << " Unpacked voltages size (in samples): "
<< _unpacked_voltage_G0.size(); << _unpacked_voltage_G0.size();
...@@ -286,14 +323,7 @@ void GatedSpectrometer<HandlerType>::gated_fft( ...@@ -286,14 +323,7 @@ void GatedSpectrometer<HandlerType>::gated_fft(
throw std::runtime_error("Unsupported number of bits"); throw std::runtime_error("Unsupported number of bits");
} }
// Get baseline from previous block // Loop over outputblocks, for case of multiple output blocks per input block
float previous_baseLineG0 = data._baseLineG0[0];
float previous_baseLineG1 = data._baseLineG1[0];
uint64_t NG0 = 0;
uint64_t NG1 = 0;
// Loop over outputblocks, for case of multiple output blocks per input block
int step = data._sideChannelData.b().size() / _noOfBitSetsIn_G0.size(); int step = data._sideChannelData.b().size() / _noOfBitSetsIn_G0.size();
for (size_t i = 0; i < _noOfBitSetsIn_G0.size(); i++) for (size_t i = 0; i < _noOfBitSetsIn_G0.size(); i++)
...@@ -307,19 +337,26 @@ void GatedSpectrometer<HandlerType>::gated_fft( ...@@ -307,19 +337,26 @@ void GatedSpectrometer<HandlerType>::gated_fft(
_selectedBit, _selectedBit,
_dadaBufferLayout.getNSideChannels(), _dadaBufferLayout.getNSideChannels(),
_selectedSideChannel, _selectedSideChannel,
previous_baseLineG0, previous_baseLineG1,
thrust::raw_pointer_cast(data._baseLineG0.data()), thrust::raw_pointer_cast(data._baseLineG0.data()),
thrust::raw_pointer_cast(data._baseLineG1.data()), thrust::raw_pointer_cast(data._baseLineG1.data()),
thrust::raw_pointer_cast(data._baseLineG0_update.data()),
thrust::raw_pointer_cast(data._baseLineG1_update.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G0.data() + i), thrust::raw_pointer_cast(_noOfBitSetsIn_G0.data() + i),
thrust::raw_pointer_cast(_noOfBitSetsIn_G1.data() + i) thrust::raw_pointer_cast(_noOfBitSetsIn_G1.data() + i)
); );
NG0 += _noOfBitSetsIn_G0[i];
NG1 += _noOfBitSetsIn_G1[i];
} }
data._baseLineG0[0] /= NG0;
data._baseLineG1[0] /= NG1;
BOOST_LOG_TRIVIAL(debug) << "Updating Baselines\n G0: " << previous_baseLineG0 << " -> " << data._baseLineG0[0] << ", " << previous_baseLineG1 << " -> " << data._baseLineG1[0] ;
// only few output blocks per input block thus execution on only one thread.
// Important is that the execution is async on the GPU.
update_baselines<<<1,1,0, _proc_stream>>>(
thrust::raw_pointer_cast(data._baseLineG0.data()),
thrust::raw_pointer_cast(data._baseLineG1.data()),
thrust::raw_pointer_cast(data._baseLineG0_update.data()),
thrust::raw_pointer_cast(data._baseLineG1_update.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G0.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G1.data()),
_noOfBitSetsIn_G0.size()
);
BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1"; BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1";
UnpackedVoltageType *_unpacked_voltage_ptr = UnpackedVoltageType *_unpacked_voltage_ptr =
......
...@@ -152,6 +152,8 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -152,6 +152,8 @@ TEST(GatedSpectrometer, GatingKernel)
thrust::device_vector<float> baseLineG0(1); thrust::device_vector<float> baseLineG0(1);
thrust::device_vector<float> baseLineG1(1); thrust::device_vector<float> baseLineG1(1);
thrust::device_vector<float> baseLineG0_update(1);
thrust::device_vector<float> baseLineG1_update(1);
thrust::fill(G0.begin(), G0.end(), 42); thrust::fill(G0.begin(), G0.end(), 42);
thrust::fill(G1.begin(), G1.end(), 23); thrust::fill(G1.begin(), G1.end(), 23);
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0); thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
...@@ -160,18 +162,22 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -160,18 +162,22 @@ TEST(GatedSpectrometer, GatingKernel)
{ {
thrust::fill(_nG0.begin(), _nG0.end(), 0); thrust::fill(_nG0.begin(), _nG0.end(), 0);
thrust::fill(_nG1.begin(), _nG1.end(), 0); thrust::fill(_nG1.begin(), _nG1.end(), 0);
baseLineG0[0] = 0.; baseLineG0[0] = -3;
baseLineG1[0] = 0.; baseLineG1[0] = -4;
baseLineG0_update[0] = 0;
baseLineG1_update[0] = 0;
const uint64_t *sideCD = const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data())); (uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024 , 1024>>>( psrdada_cpp::effelsberg::edd::gating<<<1024 , 1024>>>(
thrust::raw_pointer_cast(G0.data()), thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD, thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), G0.size(), 0, 1, G0.size(), blockSize, 0, 1,
0, 0,
-3., -4,
thrust::raw_pointer_cast(baseLineG0.data()), thrust::raw_pointer_cast(baseLineG0.data()),
thrust::raw_pointer_cast(baseLineG1.data()), thrust::raw_pointer_cast(baseLineG1.data()),
thrust::raw_pointer_cast(baseLineG0_update.data()),
thrust::raw_pointer_cast(baseLineG1_update.data()),
thrust::raw_pointer_cast(_nG0.data()), thrust::raw_pointer_cast(_nG0.data()),
thrust::raw_pointer_cast(_nG1.data()) thrust::raw_pointer_cast(_nG1.data())
); );
...@@ -188,27 +194,31 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -188,27 +194,31 @@ TEST(GatedSpectrometer, GatingKernel)
EXPECT_EQ(_nG0[0], G0.size()); EXPECT_EQ(_nG0[0], G0.size());
EXPECT_EQ(_nG1[0], 0u); EXPECT_EQ(_nG1[0], 0u);
EXPECT_FLOAT_EQ(baseLineG0[0] / (_nG0[0] + 1E-127), 42.f); EXPECT_FLOAT_EQ(42.f, baseLineG0_update[0] / (_nG0[0] + 1E-121));
EXPECT_FLOAT_EQ(baseLineG1[0] / (_nG1[0] + 1E-127), 0.f); EXPECT_FLOAT_EQ(0.f, baseLineG1_update[0] / (_nG1[0] + 1E-121));
} }
// everything to G1 // with baseline -5 // everything to G1 // with baseline -5
{ {
thrust::fill(_nG0.begin(), _nG0.end(), 0); thrust::fill(_nG0.begin(), _nG0.end(), 0);
thrust::fill(_nG1.begin(), _nG1.end(), 0); thrust::fill(_nG1.begin(), _nG1.end(), 0);
baseLineG0[0] = 0.; baseLineG0[0] = 5.;
baseLineG1[0] = 0.; baseLineG1[0] = -2;
baseLineG0_update[0] = 0;
baseLineG1_update[0] = 0;
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L); thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
const uint64_t *sideCD = const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data())); (uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>( psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()), thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD, thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), G0.size(), 0, 1, G0.size(), blockSize, 0, 1,
0, 0,
5., -2.,
thrust::raw_pointer_cast(baseLineG0.data()), thrust::raw_pointer_cast(baseLineG0.data()),
thrust::raw_pointer_cast(baseLineG1.data()), thrust::raw_pointer_cast(baseLineG1.data()),
thrust::raw_pointer_cast(baseLineG0_update.data()),
thrust::raw_pointer_cast(baseLineG1_update.data()),
thrust::raw_pointer_cast(_nG0.data()), thrust::raw_pointer_cast(_nG0.data()),
thrust::raw_pointer_cast(_nG1.data()) thrust::raw_pointer_cast(_nG1.data())
); );
...@@ -223,8 +233,9 @@ TEST(GatedSpectrometer, GatingKernel) ...@@ -223,8 +233,9 @@ TEST(GatedSpectrometer, GatingKernel)
EXPECT_EQ(_nG0[0], 0u); EXPECT_EQ(_nG0[0], 0u);
EXPECT_EQ(_nG1[0], G1.size()); EXPECT_EQ(_nG1[0], G1.size());
EXPECT_FLOAT_EQ(baseLineG0[0] / (_nG0[0] + 1E-127), 0.);
EXPECT_FLOAT_EQ(baseLineG1[0] / (_nG1[0] + 1E-127), 42.); EXPECT_FLOAT_EQ(0.f, baseLineG0_update[0] / (_nG0[0] + 1E-121));
EXPECT_FLOAT_EQ(42.f, baseLineG1_update[0] / (_nG1[0] + 1E-121));
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment