From 050253b87b163160dafa2f59a3ffeb420af1ace2 Mon Sep 17 00:00:00 2001 From: David Rohr <drohr@jwdt.org> Date: Sun, 20 Apr 2014 11:04:53 +0200 Subject: [PATCH] Improve FFT map multiplication kernel --- bioem_cuda.cu | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bioem_cuda.cu b/bioem_cuda.cu index 724d325..a4bdac3 100644 --- a/bioem_cuda.cu +++ b/bioem_cuda.cu @@ -115,12 +115,18 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset) { if (myBlockIdxX >= NumberMaps) return; - const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset]; + const mycuComplex_t* myin = (mycuComplex_t*) &refmap[myBlockIdxX * MapSize + Offset]; + const mycuComplex_t* myconv = (mycuComplex_t*) convmap; mycuComplex_t* myout = &out[myBlockIdxX * MapSize]; for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX) { - myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1]; - myout[i].y = convmap[i][1] * myin[i][0] - convmap[i][0] * myin[i][1]; + mycuComplex_t val; + const mycuComplex_t conv = myconv[i]; + const mycuComplex_t in = myin[i]; + + val.x = conv.x * in.x + conv.y * in.y; + val.y = conv.y * in.x - conv.x * in.y; + myout[i] = val; } } -- GitLab