Commit 7945bea9 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Fix for one spectra, no stride

parent 5263ab59
...@@ -41,34 +41,35 @@ void detect_and_accumulate(float2 const* __restrict__ in, int8_t* __restrict__ o ...@@ -41,34 +41,35 @@ void detect_and_accumulate(float2 const* __restrict__ in, int8_t* __restrict__ o
template <typename T> template <typename T>
__global__ __global__
void detect_and_accumulate(float2 const* __restrict__ in, float* __restrict__ out, void detect_and_accumulate(float2 const* __restrict__ in, float* __restrict__ out,
int nchans, int nsamps, int naccumulate, float scale, float offset, int stride, int out_offset) size_t nchans, size_t nsamps, size_t naccumulate, float scale, float offset, int stride, int out_offset)
{ {
const int nb = naccumulate / blockDim.x + 1; const int nb = naccumulate / blockDim.x + 1;
const int bs = blockDim.x; const int bs = blockDim.x;
const int number_of_spectra = nsamps /( nchans * naccumulate); //const int number_of_spectra = nsamps / (nchans * naccumulate);
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < nsamps * nchans / naccumulate * nb); i += blockDim.x * gridDim.x) for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < nchans * nb); i += blockDim.x * gridDim.x)
{ {
const size_t bn = i / nchans / number_of_spectra; const size_t channel_number = i % nchans;
const size_t currentOutputSpectra = i / nchans; const size_t bn = i / nchans;
const size_t currentChannel = i % nchans;
double sum = 0; double sum = 0;
for (size_t k = 0; k < bs; k++) for (size_t k = 0; k < bs; k++)
{ {
size_t j = k + bn * bs; int cidx = k + bn * bs;
if (j >= naccumulate)
if (cidx >= naccumulate)
break; break;
float2 tmp = in[ j * nchans + currentOutputSpectra * nchans * naccumulate + currentChannel]; const float2 tmp = in[channel_number + cidx * nchans];
double x = tmp.x * tmp.x; double x = tmp.x * tmp.x;
double y = tmp.y * tmp.y; double y = tmp.y * tmp.y;
sum += x + y; sum += x + y;
} }
size_t toff = out_offset * nchans + currentOutputSpectra * nchans * stride; size_t toff = out_offset * nchans;
atomicAdd(&out[toff + currentChannel], ((sum - offset)/scale)); atomicAdd(&out[toff + channel_number], ((sum - offset)/scale));
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment