bioem_algorithm.h 14.7 KB
Newer Older
Pilar Cossio's avatar
License  
Pilar Cossio committed
1
2
3
4
5
6
7
8
9
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        < BioEM software for Bayesian inference of Electron Microscopy images>
            Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
            Max Planck Institute of Biophysics, Frankfurt, Germany.
 
                See license statement for terms of distribution.

   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

qon's avatar
qon committed
10
11
#ifndef BIOEM_ALGORITHM_H
#define BIOEM_ALGORITHM_H
12
13
14
//#include <boost/iterator/iterator_concepts.hpp>

#ifndef BIOEM_GPUCODE
15
//#define SSECODE //Explicit SSE code, not correct yet since loop counter is assumed multiple of 4, anyway not faster than autovectorized code, only implemented for float, not for double.
16
17
18
19
20
21
#endif

#ifdef SSECODE
#include <emmintrin.h>
#include <smmintrin.h>
#endif
qon's avatar
qon committed
22

23
template <int GPUAlgo>
24
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, bool doAngle, myfloat_t* buf3 = NULL, int* bufint = NULL)
25
{
David Rohr's avatar
David Rohr committed
26
27
	// *******  Summing total Probabilities *************
	// ******* Need a constant because of numerical divergence*****
28
29
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	if(pProbMap.Constoadd < logpro)
30
	{
31
32
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
David Rohr's avatar
David Rohr committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
		
		// ********** Getting parameters that maximize the probability ***********
		if (GPUAlgo == 2)
		{
			bufint[0] = 1;
			buf3[1] = logpro;
		}
		else
		{
			pProbMap.max.max_prob_cent_x = cent_x;
			pProbMap.max.max_prob_cent_y = cent_y;
		}
		pProbMap.max.max_prob_orient = iOrient;
		pProbMap.max.max_prob_conv = iConv;
47
	}
48
	if (GPUAlgo != 2) pProbMap.Total += exp(logpro - pProbMap.Constoadd);
49

50
	if (doAngle)
51
	{
52
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
53

54
55
56
57
58
59
60
61
		//Summing probabilities for each orientation
		if(pProbAngle.ConstAngle < logpro)
		{
			pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
			pProbAngle.ConstAngle = logpro;
		}

		if (GPUAlgo != 2) pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
62
63
64
65
66
67
68
69
70
	}
}

__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref)
{
	// Related to Reference calculated Projection
	const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);

	// Products of different cross-correlations (first element in formula)
71
72
	const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) +
							   2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
73

David Rohr's avatar
David Rohr committed
74
	/// ******* Calculating log of Prob*********
75
76
77
78
79
	// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
	const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
	return(logpro);
}

80
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
81
{
David Rohr's avatar
David Rohr committed
82
83
84
	// ********************************************************
	// *********** Calculates the BioEM probability ***********
	// ********************************************************
85
86
87
88

	const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);

	//GCC is too stupid to inline properly, so the code is copied here
89
90
	//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb, param.writeAngles);
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
91
	if(pProbMap.Constoadd < logpro)
92
	{
93
94
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
David Rohr's avatar
David Rohr committed
95
96
97
98
99
100

		// ********** Getting parameters that maximize the probability ***********
		pProbMap.max.max_prob_cent_x = disx;
		pProbMap.max.max_prob_cent_y = disy;
		pProbMap.max.max_prob_orient = iOrient;
		pProbMap.max.max_prob_conv = iConv;
101
	}
102
	pProbMap.Total += exp(logpro - pProbMap.Constoadd);
103

104
	if (param.writeAngles)
105
	{
106
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
107

108
109
110
111
112
113
114
		if(pProbAngle.ConstAngle < logpro)
		{
			pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
			pProbAngle.ConstAngle = logpro;
		}
		pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
	}
115
116
}

117
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
118
{
119
	for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
120
	{
121
		for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
122
		{
123
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap);
124
		}
125
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
126
		{
David Rohr's avatar
David Rohr committed
127
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap);
128
129
		}
	}
130
	for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter)
131
	{
132
		for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
133
		{
David Rohr's avatar
David Rohr committed
134
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap);
135
		}
136
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
137
		{
David Rohr's avatar
David Rohr committed
138
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap);
139
140
141
142
		}
	}
}

qon's avatar
qon committed
143
template <int GPUAlgo, class RefT>
144
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap,
145
		const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
qon's avatar
qon committed
146
{
147
148
149

	// **********************  Calculating BioEM Probability ********************************
	// ************************* Loop of center displacement here ***************************
qon's avatar
qon committed
150

151
	// Taking into account the center displacement
qon's avatar
qon committed
152

153
	// Inizialzing crosscorrelations of calculated projected convolutions
154
155
156
157
#ifdef SSECODE
	myfloat_t sum, sumsquare, crossproMapConv;
	__m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2;
#else
158
159
160
	myfloat_t sum = 0.0;
	myfloat_t sumsquare = 0.0;
	myfloat_t crossproMapConv = 0.0;
161
#endif
162
	// Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map
163
	myfloat_t logpro;
David Rohr's avatar
David Rohr committed
164
	if (GPUAlgo != 2 || threadActive)
qon's avatar
qon committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
	{
		int iStart, jStart, iEnd, jEnd;
		if (cent_x < 0)
		{
			iStart = -cent_x;
			iEnd = param.NumberPixels;
		}
		else
		{
			iStart = 0;
			iEnd = param.NumberPixels - cent_x;
		}
		if (cent_y < 0)
		{
			jStart = -cent_y;
			jEnd = param.NumberPixels;
		}
		else
		{
			jStart = 0;
			jEnd = param.NumberPixels - cent_y;
		}

		for (int i = iStart; i < iEnd; i += 1)
		{
190
191
192
193
194
#ifdef SSECODE
			const float* ptr1 = &Mapconv.points[i + cent_x][jStart + cent_y];
			const float* ptr2 = RefMap.getp(iRefMap, i, jStart);
			int j;
			const int count = jEnd - jStart;
195
			for (j = 0; j <= count - 4; j += 4)
196
197
198
199
200
201
202
203
204
205
			{
				d1 = _mm_loadu_ps(ptr1);
				d2 = _mm_loadu_ps(ptr2);
				sum_v = _mm_add_ps(sum_v, d1);
				sumsquare_v = _mm_add_ps(sumsquare_v, _mm_mul_ps(d1, d1));
				cross_v = _mm_add_ps(cross_v, _mm_mul_ps(d1, d2));
				ptr1 += 4;
				ptr2 += 4;
			}
#else
qon's avatar
qon committed
206
207
			for (int j = jStart; j < jEnd; j += 1)
			{
208
				const myfloat_t pointMap = Mapconv[(i + cent_x) * param.NumberPixels + j + cent_y];
qon's avatar
qon committed
209
210
211
212
213
				const myfloat_t pointRefMap = RefMap.get(iRefMap, i, j);
				crossproMapConv += pointMap * pointRefMap;
				// Crosscorrelation of calculated displaced map
				sum += pointMap;
				// Calculate Sum of pixels squared
214
				sumsquare += pointMap * pointMap;
qon's avatar
qon committed
215
			}
216
#endif
qon's avatar
qon committed
217
		}
218
219
220
221
222
223
224
225
226
227
228
#ifdef SSECODE
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum = _mm_cvtss_f32(sum_v);
		sumsquare = _mm_cvtss_f32(sumsquare_v);
		crossproMapConv = _mm_cvtss_f32(cross_v);
#endif
David Rohr's avatar
David Rohr committed
229

230
		// Calculating elements in BioEM Probability formula
231
		logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
232
233
234
235
236
	}
	else
	{
		logpro = 0;
	}
qon's avatar
qon committed
237
238
239
240
241
242
243
244

#ifdef BIOEM_GPUCODE
	if (GPUAlgo == 2)
	{
		extern __shared__ myfloat_t buf[];
		myfloat_t* buf2 = &buf[myBlockDimX];
		myfloat_t* buf3 = &buf2[myBlockDimX + 4 * myRef];
		int* bufint = (int*) buf3;
David Rohr's avatar
David Rohr committed
245

qon's avatar
qon committed
246
247
248
249
250
251
		buf[myThreadIdxX] = logpro;
		if (myShift == 0)
		{
			bufint[0] = 0;
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
252

qon's avatar
qon committed
253
254
255
256
257
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512) if (buf[myThreadIdxX + 512] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 512];
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
258

qon's avatar
qon committed
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
		if (nShifts2 >= 512)
		{
			if (myShift < 256) if (buf[myThreadIdxX + 256] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 256];
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128) if (buf[myThreadIdxX + 128] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 128];
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64) if (buf[myThreadIdxX + 64] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 64];
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
279
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
qon's avatar
qon committed
280
281
282
283
284
285
286
287
288
			if (nShifts2 >= 64 && vbuf[myThreadIdxX + 32] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 32];
			if (nShifts2 >= 32 && vbuf[myThreadIdxX + 16] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 16];
			if (nShifts2 >= 16 && vbuf[myThreadIdxX + 8] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 8];
			if (nShifts2 >= 8 && vbuf[myThreadIdxX + 4] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 4];
			if (nShifts2 >= 4 && vbuf[myThreadIdxX + 2] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 2];
			if (nShifts2 >= 2 && vbuf[myThreadIdxX + 1] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 1];
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
				const myfloat_t logpro_max = vbuf[myThreadIdxX];
289
				update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, param.writeAngles, buf3, bufint);
qon's avatar
qon committed
290
291
			}
		}
David Rohr's avatar
David Rohr committed
292

qon's avatar
qon committed
293
		__syncthreads();
294
295
296
297

		bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

qon's avatar
qon committed
298
299
		if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1)
		{
David Rohr's avatar
David Rohr committed
300
301
			pProbMap.max.max_prob_cent_x = cent_x;
			pProbMap.max.max_prob_cent_y = cent_y;
qon's avatar
qon committed
302
		}
David Rohr's avatar
David Rohr committed
303

qon's avatar
qon committed
304
		__syncthreads();
David Rohr's avatar
David Rohr committed
305

qon's avatar
qon committed
306
307
		if (iRefMap < RefMap.ntotRefMap)
		{
308
309
			buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd);
			buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle);
qon's avatar
qon committed
310
311
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
312

qon's avatar
qon committed
313
314
315
316
317
318
319
320
321
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 512];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 512];
			}
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
322

qon's avatar
qon committed
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
		if (nShifts2 >= 512)
		{
			if (myShift < 256)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 256];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 256];
			}
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 128];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 128];
			}
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 64];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 64];
			}
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
355
356
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
			volatile myfloat_t* vbuf2 = buf2;
qon's avatar
qon committed
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
			if (nShifts2 >= 64)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 32];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 32];
			}
			if (nShifts2 >= 32)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 16];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 16];
			}
			if (nShifts2 >= 16)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 8];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 8];
			}
			if (nShifts2 >= 8)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 4];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 4];
			}
			if (nShifts2 >= 4)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 2];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 2];
			}
			if (nShifts2 >= 2)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 1];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 1];
			}
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
389
390
				pProbMap.Total += vbuf[myThreadIdxX];
				pProbAngle.forAngles += vbuf2[myThreadIdxX];
qon's avatar
qon committed
391
392
393
394
395
			}
		}
	}
	else
#endif
396
	{
397
		update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb, param.writeAngles);
398
	}
qon's avatar
qon committed
399
400
401
}

template <int GPUAlgo, class RefT>
402
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap)
qon's avatar
qon committed
403
{
404
	for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
405
	{
406
		for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
407
		{
qon's avatar
qon committed
408
			compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y);
409
410
		}
	}
qon's avatar
qon committed
411
412
413
}

#endif