Commit e283234b authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Fix multiple spectra

parent 7945bea9
......@@ -38,6 +38,21 @@ void detect_and_accumulate(float2 const* __restrict__ in, int8_t* __restrict__ o
}
/**
* @brief Integrates output power from complex voltage input.
*
* @param in Pointer to input data of size nsamps.
* @param out Pointer to output data of size nsamps / (naccumualte * nchans) * nchans
* @param nchans Number of channels
* @param naccumulate Number of input spectra to integrate into one ouput spectrum
* @param scale Normalization constant for the utput spectrum
* @param offset Output is shifted by this value
* @param stride
* @param out_offset
*
*
*/
template <typename T>
__global__
void detect_and_accumulate(float2 const* __restrict__ in, float* __restrict__ out,
......@@ -46,31 +61,35 @@ void detect_and_accumulate(float2 const* __restrict__ in, float* __restrict__ ou
const int nb = naccumulate / blockDim.x + 1;
const int bs = blockDim.x;
//const int number_of_spectra = nsamps / (nchans * naccumulate);
const int number_of_spectra = nsamps / (nchans * naccumulate);
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < nchans * nb); i += blockDim.x * gridDim.x)
{
const size_t channel_number = i % nchans;
const size_t bn = i / nchans;
for (size_t current_spectrum =0; current_spectrum < number_of_spectra; current_spectrum++)
{ // I could just launch more blocks in a second dimension
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < nchans * nb); i += blockDim.x * gridDim.x)
{
const size_t channel_number = i % nchans;
const size_t bn = i / nchans;
double sum = 0;
for (size_t k = 0; k < bs; k++)
{
int cidx = k + bn * bs;
double sum = 0;
for (size_t k = 0; k < bs; k++)
{
int cidx = k + bn * bs;
if (cidx >= naccumulate)
break;
if (cidx >= naccumulate)
break;
const float2 tmp = in[channel_number + cidx * nchans];
const float2 tmp = in[channel_number + cidx * nchans + current_spectrum * naccumulate * nchans];
double x = tmp.x * tmp.x;
double y = tmp.y * tmp.y;
sum += x + y;
}
size_t toff = out_offset * nchans;
double x = tmp.x * tmp.x;
double y = tmp.y * tmp.y;
sum += x + y;
}
size_t toff = out_offset * nchans + current_spectrum * nchans;
atomicAdd(&out[toff + channel_number], ((sum - offset)/scale));
atomicAdd(&out[toff + channel_number], ((sum - offset)/scale));
}
}
}
......
......@@ -373,7 +373,7 @@ void GatedSpectrometer<HandlerType, InputType, OutputType>::process(SinglePolari
_nchans,
inputDataStream->_channelised_voltage_G0.size() / _nchans,
_naccumulate / _nBlocks,
1, 0., 1, 0);
1, 0., 0, 0);
kernels::detect_and_accumulate<IntegratedPowerType> <<<1024, 1024, 0, _proc_stream>>>(
thrust::raw_pointer_cast(inputDataStream->_channelised_voltage_G1.data()),
......@@ -381,7 +381,7 @@ void GatedSpectrometer<HandlerType, InputType, OutputType>::process(SinglePolari
_nchans,
inputDataStream->_channelised_voltage_G1.size() / _nchans,
_naccumulate / _nBlocks,
1, 0., 1, 0);
1, 0., 0, 0);
// count saturated samples
for(size_t output_block_number = 0; output_block_number < outputDataStream->G0._noOfOverflowed.size(); output_block_number++)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment