Skip to content
Snippets Groups Projects
Commit d22f63f5 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Fixed mem transfer compute overlap

parent 5ce8991d
Branches
No related tags found
No related merge requests found
......@@ -46,6 +46,12 @@ struct PolarizationData
thrust::device_vector<UnpackedVoltageType> _baseLineG0;
/// Baseline in gate 1 state
thrust::device_vector<UnpackedVoltageType> _baseLineG1;
/// Baseline in gate 0 state after update
thrust::device_vector<UnpackedVoltageType> _baseLineG0_update;
/// Baseline in gate 1 state after update
thrust::device_vector<UnpackedVoltageType> _baseLineG1_update;
/// Channelized voltage in gate 0 state
thrust::device_vector<ChannelisedVoltageType> _channelised_voltage_G0;
/// Channelized voltage in gate 1 state
......@@ -241,8 +247,8 @@ private:
__global__ void gating(float *G0, float *G1, const int64_t *sideChannelData,
size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel,
const float baseLineG0,
const float baseLineG1,
const float* __restrict__ _baseLineG0,
const float* __restrict__ _baseLineG1,
float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1,
uint64_cu* stats_G0,
......
......@@ -51,8 +51,8 @@ __global__ void gating(float* __restrict__ G0,
const uint64_t* __restrict__ sideChannelData,
size_t N, size_t heapSize, size_t bitpos,
size_t noOfSideChannels, size_t selectedSideChannel,
const float baseLineG0,
const float baseLineG1,
const float* __restrict__ _baseLineG0,
const float* __restrict__ _baseLineG1,
float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1,
uint64_cu* stats_G0, uint64_cu* stats_G1) {
......@@ -60,6 +60,9 @@ __global__ void gating(float* __restrict__ G0,
uint32_t _G0stats = 0;
uint32_t _G1stats = 0;
const float baseLineG0 = _baseLineG0[0];
const float baseLineG1 = _baseLineG1[0];
float baselineUpdateG0 = 0;
float baselineUpdateG1 = 0;
......@@ -115,6 +118,36 @@ __global__ void gating(float* __restrict__ G0,
// Updates the baselines of the gates for the polarization set for the next
// block
// only few output blocks per input block thus execution on only one thread.
// Important is that the execution is async on the GPU.
__global__ void update_baselines(float* __restrict__ baseLineG0,
float* __restrict__ baseLineG1,
float* __restrict__ baseLineNG0,
float* __restrict__ baseLineNG1,
uint64_cu* stats_G0, uint64_cu* stats_G1,
size_t N)
{
size_t NG0 = 0;
size_t NG1 = 0;
for (size_t i =0; i < N; i++)
{
NG0 += stats_G0[i];
NG1 += stats_G1[i];
}
baseLineG0[0] = baseLineNG0[0] / NG0;
baseLineG1[0] = baseLineNG1[0] / NG1;
baseLineNG0[0] = 0;
baseLineNG1[0] = 0;
}
template <class HandlerType>
GatedSpectrometer<HandlerType>::GatedSpectrometer(
const DadaBufferLayout &dadaBufferLayout,
......@@ -194,10 +227,14 @@ GatedSpectrometer<HandlerType>::GatedSpectrometer(
_unpacked_voltage_G0.resize(_nsamps_per_buffer);
_unpacked_voltage_G1.resize(_nsamps_per_buffer);
polarization0._baseLineG0.resize(1);
polarization0._baseLineG1.resize(1);
polarization1._baseLineG0.resize(1);
polarization1._baseLineG1.resize(1);
polarization0._baseLineG0.resize(1);
polarization0._baseLineG0_update.resize(1);
polarization0._baseLineG1.resize(1);
polarization0._baseLineG1_update.resize(1);
polarization1._baseLineG0.resize(1);
polarization1._baseLineG0_update.resize(1);
polarization1._baseLineG1.resize(1);
polarization1._baseLineG1_update.resize(1);
BOOST_LOG_TRIVIAL(debug) << " Unpacked voltages size (in samples): "
<< _unpacked_voltage_G0.size();
......@@ -286,14 +323,7 @@ void GatedSpectrometer<HandlerType>::gated_fft(
throw std::runtime_error("Unsupported number of bits");
}
// Get baseline from previous block
float previous_baseLineG0 = data._baseLineG0[0];
float previous_baseLineG1 = data._baseLineG1[0];
uint64_t NG0 = 0;
uint64_t NG1 = 0;
// Loop over outputblocks, for case of multiple output blocks per input block
// Loop over outputblocks, for case of multiple output blocks per input block
int step = data._sideChannelData.b().size() / _noOfBitSetsIn_G0.size();
for (size_t i = 0; i < _noOfBitSetsIn_G0.size(); i++)
......@@ -307,19 +337,26 @@ void GatedSpectrometer<HandlerType>::gated_fft(
_selectedBit,
_dadaBufferLayout.getNSideChannels(),
_selectedSideChannel,
previous_baseLineG0, previous_baseLineG1,
thrust::raw_pointer_cast(data._baseLineG0.data()),
thrust::raw_pointer_cast(data._baseLineG1.data()),
thrust::raw_pointer_cast(data._baseLineG0_update.data()),
thrust::raw_pointer_cast(data._baseLineG1_update.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G0.data() + i),
thrust::raw_pointer_cast(_noOfBitSetsIn_G1.data() + i)
);
NG0 += _noOfBitSetsIn_G0[i];
NG1 += _noOfBitSetsIn_G1[i];
}
data._baseLineG0[0] /= NG0;
data._baseLineG1[0] /= NG1;
BOOST_LOG_TRIVIAL(debug) << "Updating Baselines\n G0: " << previous_baseLineG0 << " -> " << data._baseLineG0[0] << ", " << previous_baseLineG1 << " -> " << data._baseLineG1[0] ;
// only few output blocks per input block thus execution on only one thread.
// Important is that the execution is async on the GPU.
update_baselines<<<1,1,0, _proc_stream>>>(
thrust::raw_pointer_cast(data._baseLineG0.data()),
thrust::raw_pointer_cast(data._baseLineG1.data()),
thrust::raw_pointer_cast(data._baseLineG0_update.data()),
thrust::raw_pointer_cast(data._baseLineG1_update.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G0.data()),
thrust::raw_pointer_cast(_noOfBitSetsIn_G1.data()),
_noOfBitSetsIn_G0.size()
);
BOOST_LOG_TRIVIAL(debug) << "Performing FFT 1";
UnpackedVoltageType *_unpacked_voltage_ptr =
......
......@@ -152,6 +152,8 @@ TEST(GatedSpectrometer, GatingKernel)
thrust::device_vector<float> baseLineG0(1);
thrust::device_vector<float> baseLineG1(1);
thrust::device_vector<float> baseLineG0_update(1);
thrust::device_vector<float> baseLineG1_update(1);
thrust::fill(G0.begin(), G0.end(), 42);
thrust::fill(G1.begin(), G1.end(), 23);
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 0);
......@@ -160,18 +162,22 @@ TEST(GatedSpectrometer, GatingKernel)
{
thrust::fill(_nG0.begin(), _nG0.end(), 0);
thrust::fill(_nG1.begin(), _nG1.end(), 0);
baseLineG0[0] = 0.;
baseLineG1[0] = 0.;
baseLineG0[0] = -3;
baseLineG1[0] = -4;
baseLineG0_update[0] = 0;
baseLineG1_update[0] = 0;
const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024 , 1024>>>(
thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), G0.size(), 0, 1,
G0.size(), blockSize, 0, 1,
0,
-3., -4,
thrust::raw_pointer_cast(baseLineG0.data()),
thrust::raw_pointer_cast(baseLineG1.data()),
thrust::raw_pointer_cast(baseLineG0_update.data()),
thrust::raw_pointer_cast(baseLineG1_update.data()),
thrust::raw_pointer_cast(_nG0.data()),
thrust::raw_pointer_cast(_nG1.data())
);
......@@ -188,27 +194,31 @@ TEST(GatedSpectrometer, GatingKernel)
EXPECT_EQ(_nG0[0], G0.size());
EXPECT_EQ(_nG1[0], 0u);
EXPECT_FLOAT_EQ(baseLineG0[0] / (_nG0[0] + 1E-127), 42.f);
EXPECT_FLOAT_EQ(baseLineG1[0] / (_nG1[0] + 1E-127), 0.f);
EXPECT_FLOAT_EQ(42.f, baseLineG0_update[0] / (_nG0[0] + 1E-121));
EXPECT_FLOAT_EQ(0.f, baseLineG1_update[0] / (_nG1[0] + 1E-121));
}
// everything to G1 // with baseline -5
{
thrust::fill(_nG0.begin(), _nG0.end(), 0);
thrust::fill(_nG1.begin(), _nG1.end(), 0);
baseLineG0[0] = 0.;
baseLineG1[0] = 0.;
baseLineG0[0] = 5.;
baseLineG1[0] = -2;
baseLineG0_update[0] = 0;
baseLineG1_update[0] = 0;
thrust::fill(_sideChannelData.begin(), _sideChannelData.end(), 1L);
const uint64_t *sideCD =
(uint64_t *)(thrust::raw_pointer_cast(_sideChannelData.data()));
psrdada_cpp::effelsberg::edd::gating<<<1024, 1024>>>(
thrust::raw_pointer_cast(G0.data()),
thrust::raw_pointer_cast(G1.data()), sideCD,
G0.size(), G0.size(), 0, 1,
G0.size(), blockSize, 0, 1,
0,
5., -2.,
thrust::raw_pointer_cast(baseLineG0.data()),
thrust::raw_pointer_cast(baseLineG1.data()),
thrust::raw_pointer_cast(baseLineG0_update.data()),
thrust::raw_pointer_cast(baseLineG1_update.data()),
thrust::raw_pointer_cast(_nG0.data()),
thrust::raw_pointer_cast(_nG1.data())
);
......@@ -223,8 +233,9 @@ TEST(GatedSpectrometer, GatingKernel)
EXPECT_EQ(_nG0[0], 0u);
EXPECT_EQ(_nG1[0], G1.size());
EXPECT_FLOAT_EQ(baseLineG0[0] / (_nG0[0] + 1E-127), 0.);
EXPECT_FLOAT_EQ(baseLineG1[0] / (_nG1[0] + 1E-127), 42.);
EXPECT_FLOAT_EQ(0.f, baseLineG0_update[0] / (_nG0[0] + 1E-121));
EXPECT_FLOAT_EQ(42.f, baseLineG1_update[0] / (_nG1[0] + 1E-121));
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment