bioem_algorithm.h 14.1 KB
Newer Older
qon's avatar
qon committed
1
2
#ifndef BIOEM_ALGORITHM_H
#define BIOEM_ALGORITHM_H
3
4
5
//#include <boost/iterator/iterator_concepts.hpp>

#ifndef BIOEM_GPUCODE
6
//#define SSECODE //Explicit SSE code, not correct yet since loop counter is assumed multiple of 4, anyway not faster than autovectorized code, only implemented for float, not for double.
7
8
9
10
11
12
#endif

#ifdef SSECODE
#include <emmintrin.h>
#include <smmintrin.h>
#endif
qon's avatar
qon committed
13

14
15
16
template <int GPUAlgo>
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability* pProb, myfloat_t* buf3 = NULL, int* bufint = NULL)
{
David Rohr's avatar
David Rohr committed
17
18
	// *******  Summing total Probabilities *************
	// ******* Need a constant because of numerical divergence*****
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
	if(pProb[iRefMap].Constoadd < logpro)
	{
		pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
		pProb[iRefMap].Constoadd = logpro;
	}

	//Summing probabilities for each orientation
	if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
	{
		pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
		pProb[iRefMap].ConstAngle[iOrient] = logpro;
	}

	if (GPUAlgo != 2)
	{
		pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);
		pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
	}

David Rohr's avatar
David Rohr committed
38
	// ********** Getting parameters that maximize the probability ***********
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
	if(pProb[iRefMap].max_prob < logpro)
	{
		pProb[iRefMap].max_prob = logpro;

		if (GPUAlgo == 2)
		{
			bufint[0] = 1;
			buf3[1] = logpro;
		}
		else
		{
			pProb[iRefMap].max_prob_cent_x = cent_x;
			pProb[iRefMap].max_prob_cent_y = cent_y;
		}
		pProb[iRefMap].max_prob_orient = iOrient;
		pProb[iRefMap].max_prob_conv = iConv;
	}
}

__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref)
{
	// Related to Reference calculated Projection
	const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);

	// Products of different cross-correlations (first element in formula)
64
65
	const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) +
							   2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
66

David Rohr's avatar
David Rohr committed
67
	/// ******* Calculating log of Prob*********
68
69
70
71
72
	// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
	const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
	return(logpro);
}

David Rohr's avatar
David Rohr committed
73
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
74
{
David Rohr's avatar
David Rohr committed
75
76
77
	// ********************************************************
	// *********** Calculates the BioEM probability ***********
	// ********************************************************
78
79
80
81
82

	const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);

	//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
	//GCC is too stupid to inline properly, so the code is copied here
83
84
	if(pProb[iRefMap].Constoadd < logpro)
	{
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
		pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
		pProb[iRefMap].Constoadd = logpro;
	}
	pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);

	if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
	{
		pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
		pProb[iRefMap].ConstAngle[iOrient] = logpro;
	}
	pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);

	if(pProb[iRefMap].max_prob < logpro)
	{
		pProb[iRefMap].max_prob = logpro;
		pProb[iRefMap].max_prob_cent_x = disx;
		pProb[iRefMap].max_prob_cent_y = disy;
		pProb[iRefMap].max_prob_orient = iOrient;
		pProb[iRefMap].max_prob_conv = iConv;
	}
}

__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
{
109
	for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
110
	{
111
		for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
112
		{
113
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap);
114
		}
115
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
116
		{
David Rohr's avatar
David Rohr committed
117
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap);
118
119
		}
	}
120
	for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter)
121
	{
122
		for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
123
		{
David Rohr's avatar
David Rohr committed
124
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap);
125
		}
126
		for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
127
		{
David Rohr's avatar
David Rohr committed
128
			calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap);
129
130
131
132
		}
	}
}

qon's avatar
qon committed
133
template <int GPUAlgo, class RefT>
134
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap,
135
		const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
qon's avatar
qon committed
136
{
137
138
139

	// **********************  Calculating BioEM Probability ********************************
	// ************************* Loop of center displacement here ***************************
qon's avatar
qon committed
140

141
	// Taking into account the center displacement
qon's avatar
qon committed
142

143
	// Inizialzing crosscorrelations of calculated projected convolutions
144
145
146
147
#ifdef SSECODE
	myfloat_t sum, sumsquare, crossproMapConv;
	__m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2;
#else
148
149
150
	myfloat_t sum = 0.0;
	myfloat_t sumsquare = 0.0;
	myfloat_t crossproMapConv = 0.0;
151
#endif
152
	// Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map
153
	myfloat_t logpro;
David Rohr's avatar
David Rohr committed
154
	if (GPUAlgo != 2 || threadActive)
qon's avatar
qon committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
	{
		int iStart, jStart, iEnd, jEnd;
		if (cent_x < 0)
		{
			iStart = -cent_x;
			iEnd = param.NumberPixels;
		}
		else
		{
			iStart = 0;
			iEnd = param.NumberPixels - cent_x;
		}
		if (cent_y < 0)
		{
			jStart = -cent_y;
			jEnd = param.NumberPixels;
		}
		else
		{
			jStart = 0;
			jEnd = param.NumberPixels - cent_y;
		}

		for (int i = iStart; i < iEnd; i += 1)
		{
180
181
182
183
184
#ifdef SSECODE
			const float* ptr1 = &Mapconv.points[i + cent_x][jStart + cent_y];
			const float* ptr2 = RefMap.getp(iRefMap, i, jStart);
			int j;
			const int count = jEnd - jStart;
185
			for (j = 0; j <= count - 4; j += 4)
186
187
188
189
190
191
192
193
194
195
			{
				d1 = _mm_loadu_ps(ptr1);
				d2 = _mm_loadu_ps(ptr2);
				sum_v = _mm_add_ps(sum_v, d1);
				sumsquare_v = _mm_add_ps(sumsquare_v, _mm_mul_ps(d1, d1));
				cross_v = _mm_add_ps(cross_v, _mm_mul_ps(d1, d2));
				ptr1 += 4;
				ptr2 += 4;
			}
#else
qon's avatar
qon committed
196
197
			for (int j = jStart; j < jEnd; j += 1)
			{
198
				const myfloat_t pointMap = Mapconv[(i + cent_x) * param.NumberPixels + j + cent_y];
qon's avatar
qon committed
199
200
201
202
203
				const myfloat_t pointRefMap = RefMap.get(iRefMap, i, j);
				crossproMapConv += pointMap * pointRefMap;
				// Crosscorrelation of calculated displaced map
				sum += pointMap;
				// Calculate Sum of pixels squared
204
				sumsquare += pointMap * pointMap;
qon's avatar
qon committed
205
			}
206
#endif
qon's avatar
qon committed
207
		}
208
209
210
211
212
213
214
215
216
217
218
#ifdef SSECODE
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum_v = _mm_hadd_ps(sum_v, sum_v);
		sumsquare_v = _mm_hadd_ps(sumsquare_v, sumsquare_v);
		cross_v = _mm_hadd_ps(cross_v, cross_v);
		sum = _mm_cvtss_f32(sum_v);
		sumsquare = _mm_cvtss_f32(sumsquare_v);
		crossproMapConv = _mm_cvtss_f32(cross_v);
#endif
David Rohr's avatar
David Rohr committed
219

220
		// Calculating elements in BioEM Probability formula
221
		logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
222
223
224
225
226
	}
	else
	{
		logpro = 0;
	}
qon's avatar
qon committed
227
228
229
230
231
232
233
234

#ifdef BIOEM_GPUCODE
	if (GPUAlgo == 2)
	{
		extern __shared__ myfloat_t buf[];
		myfloat_t* buf2 = &buf[myBlockDimX];
		myfloat_t* buf3 = &buf2[myBlockDimX + 4 * myRef];
		int* bufint = (int*) buf3;
David Rohr's avatar
David Rohr committed
235

qon's avatar
qon committed
236
237
238
239
240
241
		buf[myThreadIdxX] = logpro;
		if (myShift == 0)
		{
			bufint[0] = 0;
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
242

qon's avatar
qon committed
243
244
245
246
247
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512) if (buf[myThreadIdxX + 512] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 512];
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
248

qon's avatar
qon committed
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
		if (nShifts2 >= 512)
		{
			if (myShift < 256) if (buf[myThreadIdxX + 256] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 256];
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128) if (buf[myThreadIdxX + 128] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 128];
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64) if (buf[myThreadIdxX + 64] > buf[myThreadIdxX]) buf[myThreadIdxX] = buf[myThreadIdxX + 64];
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
269
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
qon's avatar
qon committed
270
271
272
273
274
275
276
277
278
			if (nShifts2 >= 64 && vbuf[myThreadIdxX + 32] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 32];
			if (nShifts2 >= 32 && vbuf[myThreadIdxX + 16] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 16];
			if (nShifts2 >= 16 && vbuf[myThreadIdxX + 8] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 8];
			if (nShifts2 >= 8 && vbuf[myThreadIdxX + 4] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 4];
			if (nShifts2 >= 4 && vbuf[myThreadIdxX + 2] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 2];
			if (nShifts2 >= 2 && vbuf[myThreadIdxX + 1] > vbuf[myThreadIdxX]) vbuf[myThreadIdxX] = vbuf[myThreadIdxX + 1];
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
				const myfloat_t logpro_max = vbuf[myThreadIdxX];
279
				update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, buf3, bufint);
qon's avatar
qon committed
280
281
			}
		}
David Rohr's avatar
David Rohr committed
282

qon's avatar
qon committed
283
284
285
286
287
288
		__syncthreads();
		if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1)
		{
			pProb[iRefMap].max_prob_cent_x = cent_x;
			pProb[iRefMap].max_prob_cent_y = cent_y;
		}
David Rohr's avatar
David Rohr committed
289

qon's avatar
qon committed
290
		__syncthreads();
David Rohr's avatar
David Rohr committed
291

qon's avatar
qon committed
292
293
294
295
296
297
		if (iRefMap < RefMap.ntotRefMap)
		{
			buf[myThreadIdxX] = exp(logpro - pProb[iRefMap].Constoadd);
			buf2[myThreadIdxX] = exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
		}
		__syncthreads();
David Rohr's avatar
David Rohr committed
298

qon's avatar
qon committed
299
300
301
302
303
304
305
306
307
		if (nShifts2 == CUDA_MAX_SHIFT_REDUCE) // 1024
		{
			if (myShift < 512)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 512];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 512];
			}
			__syncthreads();
		}
David Rohr's avatar
David Rohr committed
308

qon's avatar
qon committed
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
		if (nShifts2 >= 512)
		{
			if (myShift < 256)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 256];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 256];
			}
			__syncthreads();
		}

		if (nShifts2 >= 256)
		{
			if (myShift < 128)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 128];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 128];
			}
			__syncthreads();
		}

		if (nShifts2 >= 128)
		{
			if (myShift < 64)
			{
				buf[myThreadIdxX] += buf[myThreadIdxX + 64];
				buf2[myThreadIdxX] += buf2[myThreadIdxX + 64];
			}
			__syncthreads();
		}

		if (myShift < 32) //Warp Size is 32, threads are synched automatically
		{
341
342
			volatile myfloat_t* vbuf = buf; //Mem must be volatile such that memory access is not reordered
			volatile myfloat_t* vbuf2 = buf2;
qon's avatar
qon committed
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
			if (nShifts2 >= 64)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 32];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 32];
			}
			if (nShifts2 >= 32)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 16];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 16];
			}
			if (nShifts2 >= 16)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 8];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 8];
			}
			if (nShifts2 >= 8)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 4];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 4];
			}
			if (nShifts2 >= 4)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 2];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 2];
			}
			if (nShifts2 >= 2)
			{
				vbuf[myThreadIdxX] += vbuf[myThreadIdxX + 1];
				vbuf2[myThreadIdxX] += vbuf2[myThreadIdxX + 1];
			}
			if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
			{
				pProb[iRefMap].Total += vbuf[myThreadIdxX];
				pProb[iRefMap].forAngles[iOrient] += vbuf2[myThreadIdxX];
			}
		}
	}
	else
#endif
382
	{
383
		update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb);
384
	}
qon's avatar
qon committed
385
386
387
}

template <int GPUAlgo, class RefT>
388
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap)
qon's avatar
qon committed
389
{
390
	for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
391
	{
392
		for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
393
		{
qon's avatar
qon committed
394
			compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y);
395
396
		}
	}
qon's avatar
qon committed
397
398
399
}

#endif