Skip to content
Snippets Groups Projects
Commit c683d17f authored by Tobias Winchen's avatar Tobias Winchen
Browse files

IMproved speed in stokes accumulate for low channel number

parent 258f6127
No related branches found
No related tags found
1 merge request!9Gated spectrometer low channel numbers and saturated samples
...@@ -137,35 +137,42 @@ __host__ __device__ void stokes_IQUV(const float2 &p1, const float2 &p2, float & ...@@ -137,35 +137,42 @@ __host__ __device__ void stokes_IQUV(const float2 &p1, const float2 &p2, float &
* @brief calculate stokes IQUV spectra pol1, pol2 are arrays of naccumulate * @brief calculate stokes IQUV spectra pol1, pol2 are arrays of naccumulate
* complex spectra for individual polarizations * complex spectra for individual polarizations
*/ */
__global__ void stokes_accumulate(float2 const __restrict__ *pol1, __global__ void stokes_accumulate(float2 const *__restrict__ pol1,
float2 const __restrict__ *pol2, float *I, float* Q, float *U, float*V, float2 const *__restrict__ pol2, float *I,
int nchans, int naccumulate) float *Q, float *U, float *V, int nchans,
{ int naccumulate) {
const int nb = naccumulate / blockDim.x + 1;
const int bs = blockDim.x;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < nchans * nb);
i += blockDim.x * gridDim.x) {
const size_t channel_number = i % nchans;
const size_t bn = i / nchans;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; (i < nchans);
i += blockDim.x * gridDim.x)
{
float rI = 0; float rI = 0;
float rQ = 0; float rQ = 0;
float rU = 0; float rU = 0;
float rV = 0; float rV = 0;
for (int k=0; k < naccumulate; k++) for (int k = 0; k < bs; k++) {
{ int cidx = k + bn * bs;
const float2 p1 = pol1[i + k * nchans]; if (cidx >= naccumulate)
const float2 p2 = pol2[i + k * nchans]; break;
const float2 p1 = pol1[channel_number + cidx * nchans];
const float2 p2 = pol2[channel_number + cidx * nchans];
rI += fabs(p1.x * p1.x + p1.y * p1.y) + fabs(p2.x * p2.x + p2.y * p2.y); rI += fabs(p1.x * p1.x + p1.y * p1.y) + fabs(p2.x * p2.x + p2.y * p2.y);
rQ += fabs(p1.x * p1.x + p1.y * p1.y) - fabs(p2.x * p2.x + p2.y * p2.y); rQ += fabs(p1.x * p1.x + p1.y * p1.y) - fabs(p2.x * p2.x + p2.y * p2.y);
rU += 2.f * (p1.x * p2.x + p1.y * p2.y); rU += 2.f * (p1.x * p2.x + p1.y * p2.y);
rV += -2.f * (p1.y * p2.x - p1.x * p2.y); rV += -2.f * (p1.y * p2.x - p1.x * p2.y);
} }
I[i] += rI;
Q[i] += rQ;
U[i] += rU;
V[i] += rV;
}
atomicAdd(&I[channel_number], rI);
atomicAdd(&Q[channel_number], rQ);
atomicAdd(&U[channel_number], rU);
atomicAdd(&V[channel_number], rV);
}
} }
... ...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment