Commit 050253b8 authored by David Rohr's avatar David Rohr
Browse files

Improve FFT map multiplication kernel

parent a65e74a6
...@@ -115,12 +115,18 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon ...@@ -115,12 +115,18 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon
__global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset) __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset)
{ {
if (myBlockIdxX >= NumberMaps) return; if (myBlockIdxX >= NumberMaps) return;
const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset]; const mycuComplex_t* myin = (mycuComplex_t*) &refmap[myBlockIdxX * MapSize + Offset];
const mycuComplex_t* myconv = (mycuComplex_t*) convmap;
mycuComplex_t* myout = &out[myBlockIdxX * MapSize]; mycuComplex_t* myout = &out[myBlockIdxX * MapSize];
for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX) for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX)
{ {
myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1]; mycuComplex_t val;
myout[i].y = convmap[i][1] * myin[i][0] - convmap[i][0] * myin[i][1]; const mycuComplex_t conv = myconv[i];
const mycuComplex_t in = myin[i];
val.x = conv.x * in.x + conv.y * in.y;
val.y = conv.y * in.x - conv.x * in.y;
myout[i] = val;
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment