From 461cf12aa3b29be7fe4de3d196223bffb683303c Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Sat, 19 Apr 2014 18:50:05 +0200
Subject: [PATCH] fix double precision mode

---
 bioem_algorithm.h |  4 +---
 bioem_cuda.cu     | 44 +++++++++++++++++++++++++++++++++++++++++---
 include/defs.h    |  4 +++-
 include/param.h   |  2 +-
 param.cpp         |  8 +++++---
 5 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/bioem_algorithm.h b/bioem_algorithm.h
index 87dbed6..2bd499f 100644
--- a/bioem_algorithm.h
+++ b/bioem_algorithm.h
@@ -70,7 +70,7 @@ __device__ static inline myfloat_t calc_logpro(const bioem_param_device& param,
 	return(logpro);
 }
 
-__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, float value, int disx, int disy, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
+__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
 {
 	// ********************************************************
 	// *********** Calculates the BioEM probability ***********
@@ -379,8 +379,6 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
 	}
 	else
 #endif
-
-		// Summing & Storing total/Orientation Probabilites for each map
 	{
 		update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb);
 	}
diff --git a/bioem_cuda.cu b/bioem_cuda.cu
index 44cc666..da2345c 100644
--- a/bioem_cuda.cu
+++ b/bioem_cuda.cu
@@ -19,6 +19,43 @@ using namespace std;
 	} \
 }
 
+static const char *cufftGetErrorStrung(cufftResult error)
+{
+    switch (error)
+    {
+        case CUFFT_SUCCESS:
+            return "CUFFT_SUCCESS";
+
+        case CUFFT_INVALID_PLAN:
+            return "CUFFT_INVALID_PLAN";
+
+        case CUFFT_ALLOC_FAILED:
+            return "CUFFT_ALLOC_FAILED";
+
+        case CUFFT_INVALID_TYPE:
+            return "CUFFT_INVALID_TYPE";
+
+        case CUFFT_INVALID_VALUE:
+            return "CUFFT_INVALID_VALUE";
+
+        case CUFFT_INTERNAL_ERROR:
+            return "CUFFT_INTERNAL_ERROR";
+
+        case CUFFT_EXEC_FAILED:
+            return "CUFFT_EXEC_FAILED";
+
+        case CUFFT_SETUP_FAILED:
+            return "CUFFT_SETUP_FAILED";
+
+        case CUFFT_INVALID_SIZE:
+            return "CUFFT_INVALID_SIZE";
+
+        case CUFFT_UNALIGNED_DATA:
+            return "CUFFT_UNALIGNED_DATA";
+    }
+    return "UNKNOWN";
+}
+
 bioem_cuda::bioem_cuda()
 {
 	deviceInitialized = 0;
@@ -127,9 +164,10 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c
 		{
 			const int num = min(CUDA_FFTS_AT_ONCE, maxRef - i);
 			multComplexMap<<<num, CUDA_THREAD_COUNT, 0, cudaStream>>>(&pConvMapFFT[(iConv & 1) * param.FFTMapSize], pRefMapsFFT, pFFTtmp2, param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D, param.FFTMapSize, num, i);
-			if (mycufftExecC2R(i + CUDA_FFTS_AT_ONCE > maxRef ? plan[1] : plan[0], pFFTtmp2, pFFTtmp) != CUFFT_SUCCESS)
+			cufftResult err = mycufftExecC2R(i + CUDA_FFTS_AT_ONCE > maxRef ? plan[1] : plan[0], pFFTtmp2, pFFTtmp);
+			if (err != CUFFT_SUCCESS)
 			{
-				cout << "Error running CUFFT\n";
+				cout << "Error running CUFFT " << cufftGetErrorStrung(err) << "\n";
 				exit(1);
 			}
 			cuDoRefMapsFFT<<<divup(num, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>>(iProjectionOut, iConv, pFFTtmp, sumC, sumsquareC, pProb_device, param.param_device, *gpumap, num, i);
@@ -301,7 +339,7 @@ int bioem_cuda::deviceStartRun()
 		for (int i = 0; i < 2; i++)
 		{
 			int n[2] = {param.param_device.NumberPixels, param.param_device.NumberPixels};
-			if (cufftPlanMany(&plan[i], 2, n, NULL, 1, 0, NULL, 1, 0, CUFFT_C2R, i ? (maxRef % CUDA_FFTS_AT_ONCE) : CUDA_FFTS_AT_ONCE) != CUFFT_SUCCESS)
+			if (cufftPlanMany(&plan[i], 2, n, NULL, 1, 0, NULL, 1, 0, MY_CUFFT_C2R, i ? (maxRef % CUDA_FFTS_AT_ONCE) : CUDA_FFTS_AT_ONCE) != CUFFT_SUCCESS)
 			{
 				cout << "Error planning CUFFT\n";
 				exit(1);
diff --git a/include/defs.h b/include/defs.h
index 9be83f6..2c65bff 100644
--- a/include/defs.h
+++ b/include/defs.h
@@ -1,7 +1,7 @@
 #ifndef BIOEM_DEFS_H
 #define BIOEM_DEFS_H
 
-//#define BIOEM_USE_DOUBLE
+#define BIOEM_USE_DOUBLE
 
 #ifndef BIOEM_USE_DOUBLE
 typedef float myfloat_t;
@@ -16,6 +16,7 @@ typedef float myfloat_t;
 #define myfftw_plan_dft_r2c_2d fftwf_plan_dft_r2c_2d
 #define myfftw_plan_dft_c2r_2d fftwf_plan_dft_c2r_2d
 #define myfftw_plan fftwf_plan
+#define MY_CUFFT_C2R CUFFT_C2R
 #define mycufftExecC2R cufftExecC2R
 #define mycuComplex_t cuComplex
 #else
@@ -33,6 +34,7 @@ typedef double myfloat_t;
 #define myfftw_plan fftw_plan
 #define mycufftExecC2R cufftExecZ2D
 #define mycuComplex_t cuDoubleComplex
+#define MY_CUFFT_C2R CUFFT_Z2D
 #endif
 typedef myfloat_t mycomplex_t[2];
 
diff --git a/include/param.h b/include/param.h
index 733b9d8..df8547a 100644
--- a/include/param.h
+++ b/include/param.h
@@ -64,7 +64,7 @@ public:
 	myfloat_t gridCTF_amp;
 	// Others
 	//myfloat_t volu;//in device class
-	myfloat3_t angles[MAX_ORIENT];
+	myfloat3_t* angles;
 	int nTotGridAngles;
 	int nTotCTFs;
 	//myfloat_t Ntotpi;//in device class
diff --git a/param.cpp b/param.cpp
index f5323fe..c92a5bd 100644
--- a/param.cpp
+++ b/param.cpp
@@ -33,6 +33,7 @@ bioem_param::bioem_param()
 
 	refCTF = NULL;
 	CtfParam = NULL;
+	angles = NULL;
 }
 
 int bioem_param::readParameters()
@@ -235,6 +236,7 @@ int bioem_param::CalculateGridsParam() //TO DO FOR QUATERNIONS
 	cos_grid_beta = 2.f / (myfloat_t) angleGridPointsBeta;
 
 	// Euler Angle Array
+	angles = new myfloat3_t[angleGridPointsAlpha * angleGridPointsBeta * angleGridPointsAlpha];
 	for (int ialpha = 0; ialpha < angleGridPointsAlpha; ialpha ++)
 	{
 		for (int ibeta = 0; ibeta < angleGridPointsBeta; ibeta ++)
@@ -344,7 +346,6 @@ int bioem_param::CalculateRefCTF()
 	return(0);
 }
 
-
 bioem_param::~bioem_param()
 {
 	releaseFFTPlans();
@@ -356,6 +357,7 @@ bioem_param::~bioem_param()
 	numberGridPointsCTF_phase = 0;
 	param_device.maxDisplaceCenter = 0;
 	numberGridPointsDisplaceCenter = 0;
-	delete[] refCTF;
-	delete[] CtfParam;
+	if (refCTF) delete[] refCTF;
+	if (CtfParam) delete[] CtfParam;
+	if (angles) delete[] angles;
 }
-- 
GitLab