bioem_algorithm.h 14.6 KB
Newer Older
Pilar Cossio's avatar
License    
Pilar Cossio committed
1
2
3
4
5
6
7
8
9
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        < BioEM software for Bayesian inference of Electron Microscopy images>
            Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
            Max Planck Institute of Biophysics, Frankfurt, Germany.
 
                See license statement for terms of distribution.

   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

qon's avatar
qon committed
10
11
#ifndef BIOEM_ALGORITHM_H
#define BIOEM_ALGORITHM_H
12
13
14
//#include <boost/iterator/iterator_concepts.hpp>

#ifndef BIOEM_GPUCODE
15
//#define SSECODE //Explicit SSE code, not correct yet since loop counter is assumed multiple of 4, anyway not faster than autovectorized code, only implemented for float, not for double.
16
17
18
19
20
21
#endif

#ifdef SSECODE
#include <emmintrin.h>
#include <smmintrin.h>
#endif
qon's avatar
qon committed
22

23
template <int GPUAlgo>
24
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, myfloat_t* buf3 = NULL, int* bufint = NULL)
25
{
David Rohr's avatar
David Rohr committed
26
27
	// *******  Summing total Probabilities *************
	// ******* Need a constant because of numerical divergence*****
28
29
30
31
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

	if(pProbMap.Constoadd < logpro)
32
	{
33
34
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
35
36
37
	}

	//Summing probabilities for each orientation
38
	if(pProbAngle.ConstAngle < logpro)
39
	{
40
41
		pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
		pProbAngle.ConstAngle = logpro;
42
43
44
45
	}

	if (GPUAlgo != 2)
	{
46
47
		pProbMap.Total += exp(logpro - pProbMap.Constoadd);
		pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
48
49
	}

David Rohr's avatar
David Rohr committed
50
	// ********** Getting parameters that maximize the probability ***********
51
	if(pProbMap.max_prob < logpro)
52
	{
53
		pProbMap.max_prob = logpro;
54
55
56
57
58
59
60
61

		if (GPUAlgo == 2)
		{
			bufint[0] = 1;
			buf3[1] = logpro;
		}
		else
		{
62
63
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
64
		}
65
66
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
67
68
69
70
71
72
73
74
75
	}
}

__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref)
{
	// Related to Reference calculated Projection
	const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);

	// Products of different cross-correlations (first element in formula)
76
77
	const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) +
							   2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
78

David Rohr's avatar
David Rohr committed
79
	/// ******* Calculating log of Prob*********
80
81
82
83
84
	// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
	const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
	return(logpro);
}

85
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
86
{
David Rohr's avatar
David Rohr committed
87
88
89
	// ********************************************************
	// *********** Calculates the BioEM probability ***********
	// ********************************************************
90
91
92

	const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);

93
94
95
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

96
	//GCC is too stupid to inline properly, so the code is copied here
97
98
	//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
	if(pProbMap.Constoadd < logpro)
99
	{
100
101
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
102
	}
103
	pProbMap.Total += exp(logpro - pProbMap.Constoadd);
104

105
	if(pProbAngle.ConstAngle < logpro)
106
	{
107
108
		pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
		pProbAngle.ConstAngle = logpro;
109
	}
110
	pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
111

112
	if(pProbMap.max_prob < logpro)
113
	{
114
115
116
117
118
		pProbMap.max_prob = logpro;
		pProbMap.max_prob_cent_x = disx;
		pProbMap.max_prob_cent_y = disy;
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
119
120
121
	}
}

122
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
123
{
124
	for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
125
	{
126
		for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
127
		{
128
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap);
129
		}
130
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
131
		{
David Rohr's avatar
David Rohr committed
132
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap);
133
134
		}
	}
135
	for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter)
136
	{
137
		for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
138
		{
David Rohr's avatar
David Rohr committed
139
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap);
140
		}
141
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
142
		{
David Rohr's avatar
David Rohr committed
143
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap);
144
145
146
147
		}
	}
}

qon's avatar
qon committed
148
template <int GPUAlgo, class RefT>
149
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap,
150
		const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
qon's avatar
qon committed
151
{
152
153
154

	// **********************  Calculating BioEM Probability ********************************
	// ************************* Loop of center displacement here ***************************
qon's avatar
qon committed
155

156
	// Taking into account the center displacement
qon's avatar
qon committed
157

158
	// Inizialzing crosscorrelations of calculated projected convolutions
159
160
161
162
#ifdef SSECODE
	myfloat_t sum, sumsquare, crossproMapConv;
	__m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2;
#else
163
164
165
	myfloat_t sum = 0.0;
	myfloat_t sumsquare = 0.0;
	myfloat_t crossproMapConv = 0.0;
166
#endif
167
	// Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map
168
	myfloat_t logpro;
David Rohr's avatar
David Rohr committed
169
	if (GPUAlgo != 2 || threadActive)
qon's avatar
qon committed
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
	{
		int iStart, jStart, iEnd, jEnd;
		if (cent_x < 0)
		{
			iStart = -cent_x;
			iEnd = param.NumberPixels;
		}
		else
		{
			iStart = 0;
			iEnd = param.NumberPixels - cent_x;
		}
		if (cent_y < 0)
		{
			jStart = -cent_y;
			jEnd = param.NumberPixels;
		}
		else
		{
			jStart = 0;
			jEnd = param.NumberPixels - cent_y;
		}

		for (int i = iStart; i < iEnd; i += 1)
		{
195
196
197
198
199
#ifdef SSECODE
			const float* ptr1 = &Mapconv.points[i + cent_x][jStart + cent_y];
			const float* ptr2 = RefMap.getp(iRefMap, i, jStart);
			int j;
			const int count = jEnd - jStart;
200
			for (j = 0; j <= count - 4; j += 4)
201
202
203
204
205
206
207
208
209
210
			{
				d1 = _mm_loadu_ps(ptr1);
				d2 = _mm_loadu_ps(ptr2);
				sum_v = _mm_add_ps(sum_v, d1);
				sumsquare_v = _mm_add_ps(sumsquare_v, _mm_mul_ps(d1, d1));
				cross_v = _mm_add_ps(cross_v, _mm_mul_ps(d1, d2));
				ptr1 += 4;
				ptr2 += 4;
			}
#else
qon's avatar
qon committed
211
212
			for (int j = jStart; j < jEnd; j += 1)
			{
213
				const myfloat_t pointMap = Mapconv[(i + cent_x) * param.NumberPixels + j + cent_y];
qon's avatar
qon committed
214
215
216
217
218
				const myfloat_t pointRefMap = RefMap.get(iRefMap, i, j);
				crossproMapConv += pointMap * pointRefMap;
				// Crosscorrelation of calculated displaced map
				sum += pointMap;
				// Calculate Sum of pixels squared
219
				sumsquare += pointMap * pointMap;
qon's avatar
qon committed
220
			}
221
#endif
qon's avatar
qon committed
222
		}
223
224
225
226
227
228
229
230
231
232
233
#ifdef SSECODE
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum = _mm_cvtss_f32(sum_v);
		sumsquare = _mm_cvtss_f32(sumsquare_v);
		crossproMapConv = _mm_cvtss_f32(cross_v);
#endif
David Rohr's avatar
David Rohr committed
234

235
		// Calculating elements in BioEM Probability formula
236
		logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
237
238
239
240
241
	}
	else
	{
		logpro = 0;
	}
qon's avatar
qon committed
242
243
244
245
246
247
248
249

#ifdef BIOEM_GPUCODE
	if (GPUAlgo == 2)
	{
		extern __shared__ myfloat_t buf[];
		myfloat_t* buf2 = &buf[myBlockDimX];
		myfloat_t* buf3 = &buf2[myBlockDimX + 4 * myRef];
		int* bufint = (int*) buf3;
David Rohr's avatar
David Rohr committed
250

qon's avatar
qon committed
251
252
253
254
255
256
		buf[myThreadIdxX] = logpro;
		if (myShift == 0)
		{
			bufint[0] = 0;
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
257

qon's avatar
qon committed
258
259
260
261
262
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512) if (buf[myThreadIdxX + 512] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 512];
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
263

qon's avatar
qon committed
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
		if (nShifts2 >= 512)
		{
			if (myShift < 256) if (buf[myThreadIdxX + 256] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 256];
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128) if (buf[myThreadIdxX + 128] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 128];
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64) if (buf[myThreadIdxX + 64] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 64];
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
284
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
qon's avatar
qon committed
285
286
287
288
289
290
291
292
293
			if (nShifts2 >= 64 && vbuf[myThreadIdxX + 32] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 32];
			if (nShifts2 >= 32 && vbuf[myThreadIdxX + 16] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 16];
			if (nShifts2 >= 16 && vbuf[myThreadIdxX + 8] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 8];
			if (nShifts2 >= 8 && vbuf[myThreadIdxX + 4] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 4];
			if (nShifts2 >= 4 && vbuf[myThreadIdxX + 2] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 2];
			if (nShifts2 >= 2 && vbuf[myThreadIdxX + 1] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 1];
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
				const myfloat_t logpro_max = vbuf[myThreadIdxX];
294
				update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, buf3, bufint);
qon's avatar
qon committed
295
296
			}
		}
David Rohr's avatar
David Rohr committed
297

qon's avatar
qon committed
298
		__syncthreads();
299
300
301
302

		bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

qon's avatar
qon committed
303
304
		if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1)
		{
305
306
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
qon's avatar
qon committed
307
		}
David Rohr's avatar
David Rohr committed
308

qon's avatar
qon committed
309
		__syncthreads();
David Rohr's avatar
David Rohr committed
310

qon's avatar
qon committed
311
312
		if (iRefMap < RefMap.ntotRefMap)
		{
313
314
			buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd);
			buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle);
qon's avatar
qon committed
315
316
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
317

qon's avatar
qon committed
318
319
320
321
322
323
324
325
326
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 512];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 512];
			}
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
327

qon's avatar
qon committed
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
		if (nShifts2 >= 512)
		{
			if (myShift < 256)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 256];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 256];
			}
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 128];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 128];
			}
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 64];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 64];
			}
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
360
361
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
			volatile myfloat_t* vbuf2 = buf2;
qon's avatar
qon committed
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
			if (nShifts2 >= 64)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 32];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 32];
			}
			if (nShifts2 >= 32)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 16];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 16];
			}
			if (nShifts2 >= 16)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 8];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 8];
			}
			if (nShifts2 >= 8)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 4];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 4];
			}
			if (nShifts2 >= 4)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 2];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 2];
			}
			if (nShifts2 >= 2)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 1];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 1];
			}
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
394
395
				pProbMap.Total += vbuf[myThreadIdxX];
				pProbAngle.forAngles += vbuf2[myThreadIdxX];
qon's avatar
qon committed
396
397
398
399
400
			}
		}
	}
	else
#endif
401
	{
402
		update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb);
403
	}
qon's avatar
qon committed
404
405
406
}

template <int GPUAlgo, class RefT>
407
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap)
qon's avatar
qon committed
408
{
409
	for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
410
	{
411
		for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
412
		{
qon's avatar
qon committed
413
			compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y);
414
415
		}
	}
qon's avatar
qon committed
416
417
418
}

#endif