bioem_algorithm.h 14.6 KB
Newer Older
Pilar Cossio's avatar
License  
Pilar Cossio committed
1 2 3 4 5 6 7 8 9
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        < BioEM software for Bayesian inference of Electron Microscopy images>
            Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
            Max Planck Institute of Biophysics, Frankfurt, Germany.
 
                See license statement for terms of distribution.

   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

qon's avatar
qon committed
10 11
#ifndef BIOEM_ALGORITHM_H
#define BIOEM_ALGORITHM_H
12 13 14
//#include <boost/iterator/iterator_concepts.hpp>

#ifndef BIOEM_GPUCODE
15
//#define SSECODE //Explicit SSE code, not correct yet since loop counter is assumed multiple of 4, anyway not faster than autovectorized code, only implemented for float, not for double.
16 17 18 19 20 21
#endif

#ifdef SSECODE
#include <emmintrin.h>
#include <smmintrin.h>
#endif
qon's avatar
qon committed
22

23
template <int GPUAlgo>
24
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, myfloat_t* buf3 = NULL, int* bufint = NULL)
25
{
David Rohr's avatar
David Rohr committed
26 27
	// *******  Summing total Probabilities *************
	// ******* Need a constant because of numerical divergence*****
28 29 30 31
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

	if(pProbMap.Constoadd < logpro)
32
	{
33 34
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
35 36 37
	}

	//Summing probabilities for each orientation
38
	if(pProbAngle.ConstAngle < logpro)
39
	{
40 41
		pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
		pProbAngle.ConstAngle = logpro;
42 43 44 45
	}

	if (GPUAlgo != 2)
	{
46 47
		pProbMap.Total += exp(logpro - pProbMap.Constoadd);
		pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
48 49
	}

David Rohr's avatar
David Rohr committed
50
	// ********** Getting parameters that maximize the probability ***********
51
	if(pProbMap.max_prob < logpro)
52
	{
53
		pProbMap.max_prob = logpro;
54 55 56 57 58 59 60 61

		if (GPUAlgo == 2)
		{
			bufint[0] = 1;
			buf3[1] = logpro;
		}
		else
		{
62 63
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
64
		}
65 66
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
67 68 69 70 71 72 73 74 75
	}
}

__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref)
{
	// Related to Reference calculated Projection
	const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);

	// Products of different cross-correlations (first element in formula)
76 77
	const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) +
							   2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
78

David Rohr's avatar
David Rohr committed
79
	/// ******* Calculating log of Prob*********
80 81 82 83 84
	// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
	const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
	return(logpro);
}

85
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
86
{
David Rohr's avatar
David Rohr committed
87 88 89
	// ********************************************************
	// *********** Calculates the BioEM probability ***********
	// ********************************************************
90 91 92

	const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);

93 94 95
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

96
	//GCC is too stupid to inline properly, so the code is copied here
97 98
	//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
	if(pProbMap.Constoadd < logpro)
99
	{
100 101
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
102
	}
103
	pProbMap.Total += exp(logpro - pProbMap.Constoadd);
104

105
	if(pProbAngle.ConstAngle < logpro)
106
	{
107 108
		pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
		pProbAngle.ConstAngle = logpro;
109
	}
110
	pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
111

112
	if(pProbMap.max_prob < logpro)
113
	{
114 115 116 117 118
		pProbMap.max_prob = logpro;
		pProbMap.max_prob_cent_x = disx;
		pProbMap.max_prob_cent_y = disy;
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
119 120 121
	}
}

122
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
123
{
124
	for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
125
	{
126
		for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
127
		{
128
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap);
129
		}
130
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
131
		{
David Rohr's avatar
David Rohr committed
132
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap);
133 134
		}
	}
135
	for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter)
136
	{
137
		for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
138
		{
David Rohr's avatar
David Rohr committed
139
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap);
140
		}
141
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
142
		{
David Rohr's avatar
David Rohr committed
143
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap);
144 145 146 147
		}
	}
}

qon's avatar
qon committed
148
template <int GPUAlgo, class RefT>
149
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap,
150
		const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
qon's avatar
qon committed
151
{
152 153 154

	// **********************  Calculating BioEM Probability ********************************
	// ************************* Loop of center displacement here ***************************
qon's avatar
qon committed
155

156
	// Taking into account the center displacement
qon's avatar
qon committed
157

158
	// Inizialzing crosscorrelations of calculated projected convolutions
159 160 161 162
#ifdef SSECODE
	myfloat_t sum, sumsquare, crossproMapConv;
	__m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2;
#else
163 164 165
	myfloat_t sum = 0.0;
	myfloat_t sumsquare = 0.0;
	myfloat_t crossproMapConv = 0.0;
166
#endif
167
	// Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map
168
	myfloat_t logpro;
David Rohr's avatar
David Rohr committed
169
	if (GPUAlgo != 2 || threadActive)
qon's avatar
qon committed
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
	{
		int iStart, jStart, iEnd, jEnd;
		if (cent_x < 0)
		{
			iStart = -cent_x;
			iEnd = param.NumberPixels;
		}
		else
		{
			iStart = 0;
			iEnd = param.NumberPixels - cent_x;
		}
		if (cent_y < 0)
		{
			jStart = -cent_y;
			jEnd = param.NumberPixels;
		}
		else
		{
			jStart = 0;
			jEnd = param.NumberPixels - cent_y;
		}

		for (int i = iStart; i < iEnd; i += 1)
		{
195 196 197 198 199
#ifdef SSECODE
			const float* ptr1 = &Mapconv.points[i + cent_x][jStart + cent_y];
			const float* ptr2 = RefMap.getp(iRefMap, i, jStart);
			int j;
			const int count = jEnd - jStart;
200
			for (j = 0; j <= count - 4; j += 4)
201 202 203 204 205 206 207 208 209 210
			{
				d1 = _mm_loadu_ps(ptr1);
				d2 = _mm_loadu_ps(ptr2);
				sum_v = _mm_add_ps(sum_v, d1);
				sumsquare_v = _mm_add_ps(sumsquare_v, _mm_mul_ps(d1, d1));
				cross_v = _mm_add_ps(cross_v, _mm_mul_ps(d1, d2));
				ptr1 += 4;
				ptr2 += 4;
			}
#else
qon's avatar
qon committed
211 212
			for (int j = jStart; j < jEnd; j += 1)
			{
213
				const myfloat_t pointMap = Mapconv[(i + cent_x) * param.NumberPixels + j + cent_y];
qon's avatar
qon committed
214 215 216 217 218
				const myfloat_t pointRefMap = RefMap.get(iRefMap, i, j);
				crossproMapConv += pointMap * pointRefMap;
				// Crosscorrelation of calculated displaced map
				sum += pointMap;
				// Calculate Sum of pixels squared
219
				sumsquare += pointMap * pointMap;
qon's avatar
qon committed
220
			}
221
#endif
qon's avatar
qon committed
222
		}
223 224 225 226 227 228 229 230 231 232 233
#ifdef SSECODE
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum = _mm_cvtss_f32(sum_v);
		sumsquare = _mm_cvtss_f32(sumsquare_v);
		crossproMapConv = _mm_cvtss_f32(cross_v);
#endif
David Rohr's avatar
David Rohr committed
234

235
		// Calculating elements in BioEM Probability formula
236
		logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
237 238 239 240 241
	}
	else
	{
		logpro = 0;
	}
qon's avatar
qon committed
242 243 244 245 246 247 248 249

#ifdef BIOEM_GPUCODE
	if (GPUAlgo == 2)
	{
		extern __shared__ myfloat_t buf[];
		myfloat_t* buf2 = &buf[myBlockDimX];
		myfloat_t* buf3 = &buf2[myBlockDimX + 4 * myRef];
		int* bufint = (int*) buf3;
David Rohr's avatar
David Rohr committed
250

qon's avatar
qon committed
251 252 253 254 255 256
		buf[myThreadIdxX] = logpro;
		if (myShift == 0)
		{
			bufint[0] = 0;
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
257

qon's avatar
qon committed
258 259 260 261 262
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512) if (buf[myThreadIdxX + 512] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 512];
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
263

qon's avatar
qon committed
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
		if (nShifts2 >= 512)
		{
			if (myShift < 256) if (buf[myThreadIdxX + 256] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 256];
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128) if (buf[myThreadIdxX + 128] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 128];
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64) if (buf[myThreadIdxX + 64] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 64];
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
284
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
qon's avatar
qon committed
285 286 287 288 289 290 291 292 293
			if (nShifts2 >= 64 && vbuf[myThreadIdxX + 32] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 32];
			if (nShifts2 >= 32 && vbuf[myThreadIdxX + 16] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 16];
			if (nShifts2 >= 16 && vbuf[myThreadIdxX + 8] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 8];
			if (nShifts2 >= 8 && vbuf[myThreadIdxX + 4] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 4];
			if (nShifts2 >= 4 && vbuf[myThreadIdxX + 2] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 2];
			if (nShifts2 >= 2 && vbuf[myThreadIdxX + 1] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 1];
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
				const myfloat_t logpro_max = vbuf[myThreadIdxX];
294
				update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, buf3, bufint);
qon's avatar
qon committed
295 296
			}
		}
David Rohr's avatar
David Rohr committed
297

qon's avatar
qon committed
298
		__syncthreads();
299 300 301 302

		bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

qon's avatar
qon committed
303 304
		if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1)
		{
305 306
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
qon's avatar
qon committed
307
		}
David Rohr's avatar
David Rohr committed
308

qon's avatar
qon committed
309
		__syncthreads();
David Rohr's avatar
David Rohr committed
310

qon's avatar
qon committed
311 312
		if (iRefMap < RefMap.ntotRefMap)
		{
313 314
			buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd);
			buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle);
qon's avatar
qon committed
315 316
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
317

qon's avatar
qon committed
318 319 320 321 322 323 324 325 326
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 512];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 512];
			}
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
327

qon's avatar
qon committed
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
		if (nShifts2 >= 512)
		{
			if (myShift < 256)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 256];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 256];
			}
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 128];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 128];
			}
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 64];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 64];
			}
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
360 361
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
			volatile myfloat_t* vbuf2 = buf2;
qon's avatar
qon committed
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
			if (nShifts2 >= 64)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 32];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 32];
			}
			if (nShifts2 >= 32)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 16];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 16];
			}
			if (nShifts2 >= 16)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 8];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 8];
			}
			if (nShifts2 >= 8)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 4];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 4];
			}
			if (nShifts2 >= 4)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 2];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 2];
			}
			if (nShifts2 >= 2)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 1];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 1];
			}
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
394 395
				pProbMap.Total += vbuf[myThreadIdxX];
				pProbAngle.forAngles += vbuf2[myThreadIdxX];
qon's avatar
qon committed
396 397 398 399 400
			}
		}
	}
	else
#endif
401
	{
402
		update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb);
403
	}
qon's avatar
qon committed
404 405 406
}

template <int GPUAlgo, class RefT>
407
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap)
qon's avatar
qon committed
408
{
409
	for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
410
	{
411
		for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
412
		{
qon's avatar
qon committed
413
			compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y);
414 415
		}
	}
qon's avatar
qon committed
416 417 418
}

#endif