From 050253b87b163160dafa2f59a3ffeb420af1ace2 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Sun, 20 Apr 2014 11:04:53 +0200
Subject: [PATCH] Improve FFT map multiplication kernel

---
 bioem_cuda.cu | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/bioem_cuda.cu b/bioem_cuda.cu
index 724d325..a4bdac3 100644
--- a/bioem_cuda.cu
+++ b/bioem_cuda.cu
@@ -115,12 +115,18 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon
 __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset)
 {
 	if (myBlockIdxX >= NumberMaps) return;
-	const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset];
+	const mycuComplex_t* myin = (mycuComplex_t*) &refmap[myBlockIdxX * MapSize + Offset];
+	const mycuComplex_t* myconv = (mycuComplex_t*) convmap;
 	mycuComplex_t* myout = &out[myBlockIdxX * MapSize];
 	for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX)
 	{
-		myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1];
-		myout[i].y = convmap[i][1] * myin[i][0] - convmap[i][0] * myin[i][1];
+		mycuComplex_t val;
+		const mycuComplex_t conv = myconv[i];
+		const mycuComplex_t in = myin[i];
+
+		val.x = conv.x * in.x + conv.y * in.y;
+		val.y = conv.y * in.x - conv.x * in.y;
+		myout[i] = val;
 	}
 }
 
-- 
GitLab