bioem_algorithm.h 14.1 KB
Newer Older
qon's avatar
qon committed
1
2
#ifndef BIOEM_ALGORITHM_H
#define BIOEM_ALGORITHM_H
3
4
5
//#include <boost/iterator/iterator_concepts.hpp>

#ifndef BIOEM_GPUCODE
6
//#define SSECODE //Explicit SSE code, not correct yet since loop counter is assumed multiple of 4, anyway not faster than autovectorized code, only implemented for float, not for double.
7
8
9
10
11
12
#endif

#ifdef SSECODE
#include <emmintrin.h>
#include <smmintrin.h>
#endif
qon's avatar
qon committed
13

14
template <int GPUAlgo>
15
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, myfloat_t* buf3 = NULL, int* bufint = NULL)
16
{
David Rohr's avatar
David Rohr committed
17
18
	// *******  Summing total Probabilities *************
	// ******* Need a constant because of numerical divergence*****
19
20
21
22
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

	if(pProbMap.Constoadd < logpro)
23
	{
24
25
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
26
27
28
	}

	//Summing probabilities for each orientation
29
	if(pProbAngle.ConstAngle < logpro)
30
	{
31
32
		pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
		pProbAngle.ConstAngle = logpro;
33
34
35
36
	}

	if (GPUAlgo != 2)
	{
37
38
		pProbMap.Total += exp(logpro - pProbMap.Constoadd);
		pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
39
40
	}

David Rohr's avatar
David Rohr committed
41
	// ********** Getting parameters that maximize the probability ***********
42
	if(pProbMap.max_prob < logpro)
43
	{
44
		pProbMap.max_prob = logpro;
45
46
47
48
49
50
51
52

		if (GPUAlgo == 2)
		{
			bufint[0] = 1;
			buf3[1] = logpro;
		}
		else
		{
53
54
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
55
		}
56
57
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
58
59
60
61
62
63
64
65
66
	}
}

__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref)
{
	// Related to Reference calculated Projection
	const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);

	// Products of different cross-correlations (first element in formula)
67
68
	const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) +
							   2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
69

David Rohr's avatar
David Rohr committed
70
	/// ******* Calculating log of Prob*********
71
72
73
74
75
	// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
	const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
	return(logpro);
}

76
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
77
{
David Rohr's avatar
David Rohr committed
78
79
80
	// ********************************************************
	// *********** Calculates the BioEM probability ***********
	// ********************************************************
81
82
83

	const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);

84
85
86
	bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
	bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

87
	//GCC is too stupid to inline properly, so the code is copied here
88
89
	//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
	if(pProbMap.Constoadd < logpro)
90
	{
91
92
		pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
		pProbMap.Constoadd = logpro;
93
	}
94
	pProbMap.Total += exp(logpro - pProbMap.Constoadd);
95

96
	if(pProbAngle.ConstAngle < logpro)
97
	{
98
99
		pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
		pProbAngle.ConstAngle = logpro;
100
	}
101
	pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
102

103
	if(pProbMap.max_prob < logpro)
104
	{
105
106
107
108
109
		pProbMap.max_prob = logpro;
		pProbMap.max_prob_cent_x = disx;
		pProbMap.max_prob_cent_y = disy;
		pProbMap.max_prob_orient = iOrient;
		pProbMap.max_prob_conv = iConv;
110
111
112
	}
}

113
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
114
{
115
	for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
116
	{
117
		for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
118
		{
119
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap);
120
		}
121
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
122
		{
David Rohr's avatar
David Rohr committed
123
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap);
124
125
		}
	}
126
	for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter)
127
	{
128
		for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
129
		{
David Rohr's avatar
David Rohr committed
130
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap);
131
		}
132
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
133
		{
David Rohr's avatar
David Rohr committed
134
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap);
135
136
137
138
		}
	}
}

qon's avatar
qon committed
139
template <int GPUAlgo, class RefT>
140
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap,
141
		const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
qon's avatar
qon committed
142
{
143
144
145

	// **********************  Calculating BioEM Probability ********************************
	// ************************* Loop of center displacement here ***************************
qon's avatar
qon committed
146

147
	// Taking into account the center displacement
qon's avatar
qon committed
148

149
	// Inizialzing crosscorrelations of calculated projected convolutions
150
151
152
153
#ifdef SSECODE
	myfloat_t sum, sumsquare, crossproMapConv;
	__m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2;
#else
154
155
156
	myfloat_t sum = 0.0;
	myfloat_t sumsquare = 0.0;
	myfloat_t crossproMapConv = 0.0;
157
#endif
158
	// Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map
159
	myfloat_t logpro;
David Rohr's avatar
David Rohr committed
160
	if (GPUAlgo != 2 || threadActive)
qon's avatar
qon committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
	{
		int iStart, jStart, iEnd, jEnd;
		if (cent_x < 0)
		{
			iStart = -cent_x;
			iEnd = param.NumberPixels;
		}
		else
		{
			iStart = 0;
			iEnd = param.NumberPixels - cent_x;
		}
		if (cent_y < 0)
		{
			jStart = -cent_y;
			jEnd = param.NumberPixels;
		}
		else
		{
			jStart = 0;
			jEnd = param.NumberPixels - cent_y;
		}

		for (int i = iStart; i < iEnd; i += 1)
		{
186
187
188
189
190
#ifdef SSECODE
			const float* ptr1 = &Mapconv.points[i + cent_x][jStart + cent_y];
			const float* ptr2 = RefMap.getp(iRefMap, i, jStart);
			int j;
			const int count = jEnd - jStart;
191
			for (j = 0; j <= count - 4; j += 4)
192
193
194
195
196
197
198
199
200
201
			{
				d1 = _mm_loadu_ps(ptr1);
				d2 = _mm_loadu_ps(ptr2);
				sum_v = _mm_add_ps(sum_v, d1);
				sumsquare_v = _mm_add_ps(sumsquare_v, _mm_mul_ps(d1, d1));
				cross_v = _mm_add_ps(cross_v, _mm_mul_ps(d1, d2));
				ptr1 += 4;
				ptr2 += 4;
			}
#else
qon's avatar
qon committed
202
203
			for (int j = jStart; j < jEnd; j += 1)
			{
204
				const myfloat_t pointMap = Mapconv[(i + cent_x) * param.NumberPixels + j + cent_y];
qon's avatar
qon committed
205
206
207
208
209
				const myfloat_t pointRefMap = RefMap.get(iRefMap, i, j);
				crossproMapConv += pointMap * pointRefMap;
				// Crosscorrelation of calculated displaced map
				sum += pointMap;
				// Calculate Sum of pixels squared
210
				sumsquare += pointMap * pointMap;
qon's avatar
qon committed
211
			}
212
#endif
qon's avatar
qon committed
213
		}
214
215
216
217
218
219
220
221
222
223
224
#ifdef SSECODE
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum = _mm_cvtss_f32(sum_v);
		sumsquare = _mm_cvtss_f32(sumsquare_v);
		crossproMapConv = _mm_cvtss_f32(cross_v);
#endif
David Rohr's avatar
David Rohr committed
225

226
		// Calculating elements in BioEM Probability formula
227
		logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
228
229
230
231
232
	}
	else
	{
		logpro = 0;
	}
qon's avatar
qon committed
233
234
235
236
237
238
239
240

#ifdef BIOEM_GPUCODE
	if (GPUAlgo == 2)
	{
		extern __shared__ myfloat_t buf[];
		myfloat_t* buf2 = &buf[myBlockDimX];
		myfloat_t* buf3 = &buf2[myBlockDimX + 4 * myRef];
		int* bufint = (int*) buf3;
David Rohr's avatar
David Rohr committed
241

qon's avatar
qon committed
242
243
244
245
246
247
		buf[myThreadIdxX] = logpro;
		if (myShift == 0)
		{
			bufint[0] = 0;
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
248

qon's avatar
qon committed
249
250
251
252
253
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512) if (buf[myThreadIdxX + 512] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 512];
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
254

qon's avatar
qon committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
		if (nShifts2 >= 512)
		{
			if (myShift < 256) if (buf[myThreadIdxX + 256] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 256];
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128) if (buf[myThreadIdxX + 128] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 128];
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64) if (buf[myThreadIdxX + 64] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 64];
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
275
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
qon's avatar
qon committed
276
277
278
279
280
281
282
283
284
			if (nShifts2 >= 64 && vbuf[myThreadIdxX + 32] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 32];
			if (nShifts2 >= 32 && vbuf[myThreadIdxX + 16] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 16];
			if (nShifts2 >= 16 && vbuf[myThreadIdxX + 8] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 8];
			if (nShifts2 >= 8 && vbuf[myThreadIdxX + 4] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 4];
			if (nShifts2 >= 4 && vbuf[myThreadIdxX + 2] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 2];
			if (nShifts2 >= 2 && vbuf[myThreadIdxX + 1] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 1];
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
				const myfloat_t logpro_max = vbuf[myThreadIdxX];
285
				update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, buf3, bufint);
qon's avatar
qon committed
286
287
			}
		}
David Rohr's avatar
David Rohr committed
288

qon's avatar
qon committed
289
		__syncthreads();
290
291
292
293

		bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
		bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);

qon's avatar
qon committed
294
295
		if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1)
		{
296
297
			pProbMap.max_prob_cent_x = cent_x;
			pProbMap.max_prob_cent_y = cent_y;
qon's avatar
qon committed
298
		}
David Rohr's avatar
David Rohr committed
299

qon's avatar
qon committed
300
		__syncthreads();
David Rohr's avatar
David Rohr committed
301

qon's avatar
qon committed
302
303
		if (iRefMap < RefMap.ntotRefMap)
		{
304
305
			buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd);
			buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle);
qon's avatar
qon committed
306
307
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
308

qon's avatar
qon committed
309
310
311
312
313
314
315
316
317
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 512];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 512];
			}
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
318

qon's avatar
qon committed
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
		if (nShifts2 >= 512)
		{
			if (myShift < 256)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 256];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 256];
			}
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 128];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 128];
			}
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 64];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 64];
			}
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
351
352
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
			volatile myfloat_t* vbuf2 = buf2;
qon's avatar
qon committed
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
			if (nShifts2 >= 64)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 32];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 32];
			}
			if (nShifts2 >= 32)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 16];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 16];
			}
			if (nShifts2 >= 16)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 8];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 8];
			}
			if (nShifts2 >= 8)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 4];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 4];
			}
			if (nShifts2 >= 4)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 2];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 2];
			}
			if (nShifts2 >= 2)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 1];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 1];
			}
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
385
386
				pProbMap.Total += vbuf[myThreadIdxX];
				pProbAngle.forAngles += vbuf2[myThreadIdxX];
qon's avatar
qon committed
387
388
389
390
391
			}
		}
	}
	else
#endif
392
	{
393
		update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb);
394
	}
qon's avatar
qon committed
395
396
397
}

template <int GPUAlgo, class RefT>
398
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap)
qon's avatar
qon committed
399
{
400
	for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
401
	{
402
		for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
403
		{
qon's avatar
qon committed
404
			compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y);
405
406
		}
	}
qon's avatar
qon committed
407
408
409
}

#endif