Commit 050253b8 authored by David Rohr's avatar David Rohr

Improve FFT map multiplication kernel

parent a65e74a6
......@@ -115,12 +115,18 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon
__global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset)
{
if (myBlockIdxX >= NumberMaps) return;
const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset];
const mycuComplex_t* myin = (mycuComplex_t*) &refmap[myBlockIdxX * MapSize + Offset];
const mycuComplex_t* myconv = (mycuComplex_t*) convmap;
mycuComplex_t* myout = &out[myBlockIdxX * MapSize];
for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX)
{
myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1];
myout[i].y = convmap[i][1] * myin[i][0] - convmap[i][0] * myin[i][1];
mycuComplex_t val;
const mycuComplex_t conv = myconv[i];
const mycuComplex_t in = myin[i];
val.x = conv.x * in.x + conv.y * in.y;
val.y = conv.y * in.x - conv.x * in.y;
myout[i] = val;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment