Commit 6ab413cc authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Fix stride

parent e283234b
......@@ -45,10 +45,11 @@ void detect_and_accumulate(float2 const* __restrict__ in, int8_t* __restrict__ o
* @param in Pointer to input data of size nsamps.
* @param out Pointer to output data of size nsamps / (naccumualte * nchans) * nchans
* @param nchans Number of channels
* @param naccumulate Number of input spectra to integrate into one ouput spectrum
* @param naccumulate Number of input spectra to integrate into one output spectrum
* @param scale Normalization constant for the utput spectrum
* @param offset Output is shifted by this value
* @param stride
* @param stride Stride of the output spectra. For 2, leave a gap of
* nchan between every output spetrum, for 1 no gap.
* @param out_offset
*
*
......@@ -84,7 +85,7 @@ void detect_and_accumulate(float2 const* __restrict__ in, float* __restrict__ ou
double y = tmp.y * tmp.y;
sum += x + y;
}
size_t toff = out_offset * nchans + current_spectrum * nchans;
size_t toff = out_offset * nchans + current_spectrum * nchans * stride;
atomicAdd(&out[toff + channel_number], ((sum - offset)/scale));
}
......
......@@ -373,7 +373,7 @@ void GatedSpectrometer<HandlerType, InputType, OutputType>::process(SinglePolari
_nchans,
inputDataStream->_channelised_voltage_G0.size() / _nchans,
_naccumulate / _nBlocks,
1, 0., 0, 0);
1, 0., 1, 0);
kernels::detect_and_accumulate<IntegratedPowerType> <<<1024, 1024, 0, _proc_stream>>>(
thrust::raw_pointer_cast(inputDataStream->_channelised_voltage_G1.data()),
......@@ -381,7 +381,7 @@ void GatedSpectrometer<HandlerType, InputType, OutputType>::process(SinglePolari
_nchans,
inputDataStream->_channelised_voltage_G1.size() / _nchans,
_naccumulate / _nBlocks,
1, 0., 0, 0);
1, 0., 1, 0);
// count saturated samples
for(size_t output_block_number = 0; output_block_number < outputDataStream->G0._noOfOverflowed.size(); output_block_number++)
......
......@@ -149,6 +149,45 @@ TEST_P(detect_and_accumulate32bit, no_stride)
}
}
TEST_P(detect_and_accumulate32bit, w_stride)
{
thrust::device_vector<float2> input;
thrust::device_vector<float> output;
detect_accumulate_params params = GetParam();
input.resize(params.nspectra * params.nchan * params.naccumulate);
output.resize(params.nspectra * params.nchan * 2);
float2 v;
v.x = 1.0;
v.y = 1.0;
thrust::fill(input.begin(), input.end(), v);
thrust::fill(output.begin(), output.end(), 0.);
kernels::detect_and_accumulate<float> <<<1024, 1024>>>(
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),
params.nchan,
input.size(),
params.naccumulate,
1, 0., 2, 0);
cudaDeviceSynchronize();
thrust::host_vector<float> output_host = output;
for (size_t i =0; i < params.nchan * params.nspectra; i++)
{
size_t current_spectrum = i / params.nchan;
ASSERT_FLOAT_EQ(output_host[i + current_spectrum * params.nchan], (v.x * v.x + v.y * v.y) * params.naccumulate) << "i = " << i << " for nchan = " << params.nchan << ", nspectra = " << params.nspectra << ", naccumulate = " << params.naccumulate;
}
}
INSTANTIATE_TEST_CASE_P (DetectorAccumulatorTester,
detect_and_accumulate32bit,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment