bioem_algorithm.h 14.7 KB
Newer Older
Pilar Cossio's avatar
License  
Pilar Cossio committed
1 2 3 4 5 6 7 8 9
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        < BioEM software for Bayesian inference of Electron Microscopy images>
            Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
            Max Planck Institute of Biophysics, Frankfurt, Germany.
 
                See license statement for terms of distribution.

   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

qon's avatar
qon committed
10 11
#ifndef BIOEM_ALGORITHM_H
#define BIOEM_ALGORITHM_H
12 13 14
//#include <boost/iterator/iterator_concepts.hpp>

#ifndef BIOEM_GPUCODE
15
//#define SSECODE //Explicit SSE code, not correct yet since loop counter is assumed multiple of 4, anyway not faster than autovectorized code, only implemented for float, not for double.
16 17 18 19 20 21
#endif

#ifdef SSECODE
#include <emmintrin.h>
#include <smmintrin.h>
#endif
qon's avatar
qon committed
22

23
template <int GPUAlgo>
24
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, bool doAngle, myfloat_t* buf3 = NULL, int* bufint = NULL)
25
{
David Rohr's avatar
David Rohr committed
26 27
	// *******  Summing total Probabilities *************
	// ******* Need a constant because of numerical divergence*****
28 29
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	if(pProbMap.Constoadd < logpro)
30
	{
31 32
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
33
	}
34
	if (GPUAlgo != 2) pProbMap.Total += exp(logpro - pProbMap.Constoadd);
35

36
	if (doAngle)
37
	{
38
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
39

40 41 42 43 44 45 46 47
		//Summing probabilities for each orientation
		if(pProbAngle.ConstAngle < logpro)
		{
			pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
			pProbAngle.ConstAngle = logpro;
		}

		if (GPUAlgo != 2) pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
48 49
	}

David Rohr's avatar
David Rohr committed
50
	// ********** Getting parameters that maximize the probability ***********
51
	if(pProbMap.max_prob < logpro)
52
	{
53
		pProbMap.max_prob = logpro;
54 55 56 57 58 59 60 61

		if (GPUAlgo == 2)
		{
			bufint[0] = 1;
			buf3[1] = logpro;
		}
		else
		{
62 63
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
64
		}
65 66
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
67 68 69 70 71 72 73 74 75
	}
}

__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref)
{
	// Related to Reference calculated Projection
	const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);

	// Products of different cross-correlations (first element in formula)
76 77
	const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) +
							   2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
78

David Rohr's avatar
David Rohr committed
79
	/// ******* Calculating log of Prob*********
80 81 82 83 84
	// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
	const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
	return(logpro);
}

85
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
86
{
David Rohr's avatar
David Rohr committed
87 88 89
	// ********************************************************
	// *********** Calculates the BioEM probability ***********
	// ********************************************************
90 91 92 93

	const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);

	//GCC is too stupid to inline properly, so the code is copied here
94 95
	//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb, param.writeAngles);
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
96
	if(pProbMap.Constoadd < logpro)
97
	{
98 99
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
100
	}
101
	pProbMap.Total += exp(logpro - pProbMap.Constoadd);
102

103
	if (param.writeAngles)
104
	{
105
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
106

107 108 109 110 111 112 113 114
		if(pProbAngle.ConstAngle < logpro)
		{
			pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
			pProbAngle.ConstAngle = logpro;
		}
		pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
	}
	
115
	if(pProbMap.max_prob < logpro)
116
	{
117 118 119 120 121
		pProbMap.max_prob = logpro;
		pProbMap.max_prob_cent_x = disx;
		pProbMap.max_prob_cent_y = disy;
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
122 123 124
	}
}

125
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
126
{
127
	for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
128
	{
129
		for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
130
		{
131
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap);
132
		}
133
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
134
		{
David Rohr's avatar
David Rohr committed
135
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap);
136 137
		}
	}
138
	for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter)
139
	{
140
		for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
141
		{
David Rohr's avatar
David Rohr committed
142
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap);
143
		}
144
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
145
		{
David Rohr's avatar
David Rohr committed
146
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap);
147 148 149 150
		}
	}
}

qon's avatar
qon committed
151
template <int GPUAlgo, class RefT>
152
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap,
153
		const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
qon's avatar
qon committed
154
{
155 156 157

	// **********************  Calculating BioEM Probability ********************************
	// ************************* Loop of center displacement here ***************************
qon's avatar
qon committed
158

159
	// Taking into account the center displacement
qon's avatar
qon committed
160

161
	// Inizialzing crosscorrelations of calculated projected convolutions
162 163 164 165
#ifdef SSECODE
	myfloat_t sum, sumsquare, crossproMapConv;
	__m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2;
#else
166 167 168
	myfloat_t sum = 0.0;
	myfloat_t sumsquare = 0.0;
	myfloat_t crossproMapConv = 0.0;
169
#endif
170
	// Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map
171
	myfloat_t logpro;
David Rohr's avatar
David Rohr committed
172
	if (GPUAlgo != 2 || threadActive)
qon's avatar
qon committed
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
	{
		int iStart, jStart, iEnd, jEnd;
		if (cent_x < 0)
		{
			iStart = -cent_x;
			iEnd = param.NumberPixels;
		}
		else
		{
			iStart = 0;
			iEnd = param.NumberPixels - cent_x;
		}
		if (cent_y < 0)
		{
			jStart = -cent_y;
			jEnd = param.NumberPixels;
		}
		else
		{
			jStart = 0;
			jEnd = param.NumberPixels - cent_y;
		}

		for (int i = iStart; i < iEnd; i += 1)
		{
198 199 200 201 202
#ifdef SSECODE
			const float* ptr1 = &Mapconv.points[i + cent_x][jStart + cent_y];
			const float* ptr2 = RefMap.getp(iRefMap, i, jStart);
			int j;
			const int count = jEnd - jStart;
203
			for (j = 0; j <= count - 4; j += 4)
204 205 206 207 208 209 210 211 212 213
			{
				d1 = _mm_loadu_ps(ptr1);
				d2 = _mm_loadu_ps(ptr2);
				sum_v = _mm_add_ps(sum_v, d1);
				sumsquare_v = _mm_add_ps(sumsquare_v, _mm_mul_ps(d1, d1));
				cross_v = _mm_add_ps(cross_v, _mm_mul_ps(d1, d2));
				ptr1 += 4;
				ptr2 += 4;
			}
#else
qon's avatar
qon committed
214 215
			for (int j = jStart; j < jEnd; j += 1)
			{
216
				const myfloat_t pointMap = Mapconv[(i + cent_x) * param.NumberPixels + j + cent_y];
qon's avatar
qon committed
217 218 219 220 221
				const myfloat_t pointRefMap = RefMap.get(iRefMap, i, j);
				crossproMapConv += pointMap * pointRefMap;
				// Crosscorrelation of calculated displaced map
				sum += pointMap;
				// Calculate Sum of pixels squared
222
				sumsquare += pointMap * pointMap;
qon's avatar
qon committed
223
			}
224
#endif
qon's avatar
qon committed
225
		}
226 227 228 229 230 231 232 233 234 235 236
#ifdef SSECODE
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum = _mm_cvtss_f32(sum_v);
		sumsquare = _mm_cvtss_f32(sumsquare_v);
		crossproMapConv = _mm_cvtss_f32(cross_v);
#endif
David Rohr's avatar
David Rohr committed
237

238
		// Calculating elements in BioEM Probability formula
239
		logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
240 241 242 243 244
	}
	else
	{
		logpro = 0;
	}
qon's avatar
qon committed
245 246 247 248 249 250 251 252

#ifdef BIOEM_GPUCODE
	if (GPUAlgo == 2)
	{
		extern __shared__ myfloat_t buf[];
		myfloat_t* buf2 = &buf[myBlockDimX];
		myfloat_t* buf3 = &buf2[myBlockDimX + 4 * myRef];
		int* bufint = (int*) buf3;
David Rohr's avatar
David Rohr committed
253

qon's avatar
qon committed
254 255 256 257 258 259
		buf[myThreadIdxX] = logpro;
		if (myShift == 0)
		{
			bufint[0] = 0;
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
260

qon's avatar
qon committed
261 262 263 264 265
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512) if (buf[myThreadIdxX + 512] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 512];
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
266

qon's avatar
qon committed
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
		if (nShifts2 >= 512)
		{
			if (myShift < 256) if (buf[myThreadIdxX + 256] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 256];
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128) if (buf[myThreadIdxX + 128] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 128];
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64) if (buf[myThreadIdxX + 64] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 64];
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
287
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
qon's avatar
qon committed
288 289 290 291 292 293 294 295 296
			if (nShifts2 >= 64 && vbuf[myThreadIdxX + 32] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 32];
			if (nShifts2 >= 32 && vbuf[myThreadIdxX + 16] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 16];
			if (nShifts2 >= 16 && vbuf[myThreadIdxX + 8] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 8];
			if (nShifts2 >= 8 && vbuf[myThreadIdxX + 4] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 4];
			if (nShifts2 >= 4 && vbuf[myThreadIdxX + 2] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 2];
			if (nShifts2 >= 2 && vbuf[myThreadIdxX + 1] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 1];
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
				const myfloat_t logpro_max = vbuf[myThreadIdxX];
297
				update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, param.writeAngles, buf3, bufint);
qon's avatar
qon committed
298 299
			}
		}
David Rohr's avatar
David Rohr committed
300

qon's avatar
qon committed
301
		__syncthreads();
302 303 304 305

		bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

qon's avatar
qon committed
306 307
		if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1)
		{
308 309
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
qon's avatar
qon committed
310
		}
David Rohr's avatar
David Rohr committed
311

qon's avatar
qon committed
312
		__syncthreads();
David Rohr's avatar
David Rohr committed
313

qon's avatar
qon committed
314 315
		if (iRefMap < RefMap.ntotRefMap)
		{
316 317
			buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd);
			buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle);
qon's avatar
qon committed
318 319
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
320

qon's avatar
qon committed
321 322 323 324 325 326 327 328 329
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 512];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 512];
			}
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
330

qon's avatar
qon committed
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
		if (nShifts2 >= 512)
		{
			if (myShift < 256)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 256];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 256];
			}
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 128];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 128];
			}
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 64];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 64];
			}
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
363 364
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
			volatile myfloat_t* vbuf2 = buf2;
qon's avatar
qon committed
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
			if (nShifts2 >= 64)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 32];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 32];
			}
			if (nShifts2 >= 32)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 16];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 16];
			}
			if (nShifts2 >= 16)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 8];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 8];
			}
			if (nShifts2 >= 8)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 4];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 4];
			}
			if (nShifts2 >= 4)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 2];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 2];
			}
			if (nShifts2 >= 2)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 1];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 1];
			}
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
397 398
				pProbMap.Total += vbuf[myThreadIdxX];
				pProbAngle.forAngles += vbuf2[myThreadIdxX];
qon's avatar
qon committed
399 400 401 402 403
			}
		}
	}
	else
#endif
404
	{
405
		update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb, param.writeAngles);
406
	}
qon's avatar
qon committed
407 408 409
}

template <int GPUAlgo, class RefT>
410
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap)
qon's avatar
qon committed
411
{
412
	for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
413
	{
414
		for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
415
		{
qon's avatar
qon committed
416
			compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y);
417 418
		}
	}
qon's avatar
qon committed
419 420 421
}

#endif